环境
- Flink 1.15.3
- Doirs 1.2.1
- Mysql 5.7.34
pom.xml
文件内容
<properties>
<maven.compiler.source>1.8</maven.compiler.source>
<maven.compiler.target>1.8</maven.compiler.target>
<scala.version>2.12.11</scala.version>
<hadoop.version>3.1.1</hadoop.version>
<flink.version>1.15.3</flink.version>
<encoding>UTF-8</encoding>
</properties>
<dependencys>
<!-- flink -->
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-scala_2.12</artifactId>
<version>${flink.version}</version>
<scope>compile</scope>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-streaming-scala_2.12</artifactId>
<version>${flink.version}</version>
<scope>compile</scope>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-java</artifactId>
<version>${flink.version}</version>
<scope>compile</scope>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-streaming-java</artifactId>
<version>${flink.version}</version>
<scope>compile</scope>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-clients</artifactId>
<version>${flink.version}</version>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-table-api-java</artifactId>
<version>${flink.version}</version>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-table-planner-loader</artifactId>
<version>${flink.version}</version>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-table-runtime</artifactId>
<version>${flink.version}</version>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-jdbc_2.12</artifactId>
<version>1.10.0</version>
<!-- <scope>provided</scope> -->
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-table-api-scala-bridge_2.12</artifactId>
<version>${flink.version}</version>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-table-api-java-bridge</artifactId>
<version>${flink.version}</version>
</dependency>
<!-- JDBC connector -->
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-connector-jdbc</artifactId>
<version>${flink.version}</version>
</dependency>
<!-- Kafka connector -->
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-connector-kafka</artifactId>
<version>${flink.version}</version>
</dependency>
<!-- Mysql CDC connector -->
<dependency>
<groupId>com.ververica</groupId>
<artifactId>flink-connector-mysql-cdc</artifactId>
<version>2.3.0</version>
</dependency>
<dependency>
<groupId>com.ververica</groupId>
<artifactId> flink-sql-connector-mysql-cdc</artifactId>
<version>2.2.1</version>
</dependency>
<!-- Doris connector -->
<dependency>
<groupId>org.apache.doris</groupId>
<artifactId>flink-doris-connector-1.15</artifactId>
<version>1.2.0</version>
</dependency>
<dependencys>
代码内容
package com.jin.flink;
import org.apache.flink.api.common.RuntimeExecutionMode;
import org.apache.flink.streaming.api.CheckpointingMode;
import org.apache.flink.streaming.api.environment.CheckpointConfig;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.table.api.Table;
import org.apache.flink.table.api.TableEnvironment;
import org.apache.flink.table.api.bridge.java.StreamTableEnvironment;
/**
* @Author : Admin
* @Vertion : 1.0
* Description: TODO 测试
**/
public class FlinkSqlCDC {
public static void main(String[] args){
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.getDefaultSavepointDirectory();
// 每120秒作为checkpoint的一个周期
env.enableCheckpointing(120000);
// 两次checkpoint间隔最少是30秒
env.getCheckpointConfig().setMinPauseBetweenCheckpoints(30000);
// 程序取消或者停止时不删除checkpoint
env.getCheckpointConfig().setExternalizedCheckpointCleanup(CheckpointConfig.ExternalizedCheckpointCleanup.RETAIN_ON_CANCELLATION);
// checkpoint必须在60秒结束,否则将丢弃
env.getCheckpointConfig().setCheckpointTimeout(60000);
// 同一时间只能有一个checkpoint
env.getCheckpointConfig().setMaxConcurrentCheckpoints(1);
// 设置EXACTLY_ONCE语义,默认就是这个
env.getCheckpointConfig().setCheckpointingMode(CheckpointingMode.EXACTLY_ONCE);
// checkpoint存储位置
env.getCheckpointConfig().setCheckpointStorage("file:\\\\\\E:\\TestData\\checkpoint\\");
// 设置执行模型为Streaming方式
env.setRuntimeMode(RuntimeExecutionMode.STREAMING);
TableEnvironment tableEnv = StreamTableEnvironment.create(env);
tableEnv.executeSql("CREATE TABLE source_user_info(" +
"`id` STRING, " +
"`name` STRING, " +
"`gender` STRING, " +
"`age` INT, " +
"`height` INT, " +
"`weight` DOUBLE, " +
"`communication_operator` STRING, " +
"`phone_number` STRING, " +
"`salary` INT, " +
"`registration_date` DATE," +
"PRIMARY KEY (`id`,`name`) NOT ENFORCED" +
")WITH (" +
"'connector' = 'mysql-cdc', " +
"'hostname' = 'lx01', " +
"'port' = '3306', " +
"'username' = 'root', " +
"'password' = 'password'," +
"'database-name' = 'test_db', " +
"'table-name' = 'user_info', " +
"'scan.startup.mode' = 'initial', " +
"'heartbeat.interval' = '40s'" +
")"
);
tableEnv.executeSql("CREATE TABLE sink_user_info(" +
"`id` STRING, " +
"`name` STRING, " +
"`gender` STRING, " +
"`age` INT, " +
"`height` INT, " +
"`weight` DOUBLE, " +
"`communication_operator` STRING, " +
"`phone_number` STRING, " +
"`salary` INT, " +
"`registration_date` DATE, " +
"PRIMARY KEY (`id`,`name`) NOT ENFORCED" +
")WITH (" +
"'connector' = 'doris', " +
"'fenodes' = 'lx01:8030', " +
"'table.identifier' = 'db_test.user_info', " +
"'username' = 'root', " +
"'password' = 'password'," + // 如果设置了就填密码,如果安装完Doris还没有修改密码则用''代替即可
"'sink.enable-delete' = 'true'," + // 开启数据删除,使用这个参数的表必须是Unique模型的
"'sink.properties.format' = 'json'," + // json导入格式
"'sink.properties.read_json_by_line' = 'true'," +
"'sink.label-prefix' = 'doris_label38'" + // 写入批次编号,这个参数如果不写将会自己生成
")"
);
Table sourceTable = tableEnv.sqlQuery("SELECT `id`, `name`, `gender`, `age`, `height`, `weight`, `communication_operator`, `phone_number`, `salary`, `registration_date` FROM source_user_info");
tableEnv.executeSql("INSERT INTO sink_user_info SELECT * FROM source_user_info");
sourceTable.execute().print();
System.out.println("......");
}
}
这里Doris
建表使用的Unique
模型,如果是其他模型的数据表,还要根据实际情况调整对应的配置参数.