1.POM.XML
<!--Flink--> <dependency> <groupId>org.apache.flink</groupId> <artifactId>flink-table-planner-blink_2.12</artifactId> <version>1.13.6</version> </dependency> <dependency> <groupId>mysql</groupId> <artifactId>mysql-connector-java</artifactId> <version>5.1.49</version> </dependency> <dependency> <groupId>org.apache.flink</groupId> <artifactId>flink-connector-jdbc_2.12</artifactId> <version>1.13.6</version> </dependency> <dependency> <groupId>org.apache.hadoop</groupId> <artifactId>hadoop-client</artifactId> <version>3.1.3</version> </dependency> <dependency> <groupId>org.apache.flink</groupId> <artifactId>flink-clients_2.12</artifactId> <version>1.13.6</version> </dependency> <dependency> <groupId>com.alibaba.ververica</groupId> <artifactId>flink-connector-mysql-cdc</artifactId> <version>1.4.0</version> </dependency> <dependency> <groupId>org.apache.flink</groupId> <artifactId>flink-clients_2.12</artifactId> <version>1.13.6</version> </dependency> <dependency> <groupId>ru.yandex.clickhouse</groupId> <artifactId>clickhouse-jdbc</artifactId> <version>0.3.2</version> </dependency> <!-- 引入日志管理相关依赖--> <dependency> <groupId>org.slf4j</groupId> <artifactId>slf4j-api</artifactId> <version>${slf4j.version}</version> </dependency> <dependency> <groupId>org.slf4j</groupId> <artifactId>slf4j-log4j12</artifactId> <version>${slf4j.version}</version> </dependency> <dependency> <groupId>org.apache.logging.log4j</groupId> <artifactId>log4j-to-slf4j</artifactId> <version>2.14.0</version> </dependency>
2.代码
package flink.dwd; import org.apache.flink.api.common.restartstrategy.RestartStrategies; import org.apache.flink.runtime.state.filesystem.FsStateBackend; import org.apache.flink.streaming.api.CheckpointingMode; import org.apache.flink.streaming.api.environment.CheckpointConfig; import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; import org.apache.flink.table.api.bridge.java.StreamTableEnvironment; public class DwdTest { public static void main(String[] args) throws Exception { //1.创建执行环境 StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); // 高级选项: env.setParallelism(1); // 每 1000ms 开始一次 checkpoint env.enableCheckpointing(120000); // 设置模式为精确一次 (这是默认值) env.getCheckpointConfig().setCheckpointingMode(CheckpointingMode.EXACTLY_ONCE); // Checkpoint 必须在三分钟内完成,否则就会被抛弃 env.getCheckpointConfig().setCheckpointTimeout(180000); // 允许两个连续的 checkpoint 错误 env.getCheckpointConfig().setTolerableCheckpointFailureNumber(2); // 同一时间只允许一个 checkpoint 进行 env.getCheckpointConfig().setMaxConcurrentCheckpoints(1); //2.2 指定 CK 的一致性语义 env.getCheckpointConfig().setCheckpointingMode(CheckpointingMode.EXACTLY_ONCE); // 使用 externalized checkpoints,这样 checkpoint 在作业取消后仍就会被保留 env.getCheckpointConfig().setExternalizedCheckpointCleanup( CheckpointConfig.ExternalizedCheckpointCleanup.RETAIN_ON_CANCELLATION); //指定从 CK 自动重启策略 env.setRestartStrategy(RestartStrategies.fixedDelayRestart(3, 2000L)); //设置状态后端 env.setStateBackend(new FsStateBackend("hdfs://hadoop01:8020/flinkhdfs")); //2.6 设置访问 HDFS 的用户名 System.setProperty("HADOOP_USER_NAME", "renhuifeng"); StreamTableEnvironment tableEnv = StreamTableEnvironment.create(env); //2.创建 Flink-MySQL-CDC 的 Source tableEnv.executeSql("CREATE TABLE c_building (" + " id STRING," + " name STRING," + " housing_estate_id STRING," + " PRIMARY KEY (id) NOT ENFORCED" + //PRIMARY KEY (id) NOT ENFORCED ") WITH (" + " 'connector' = 'mysql-cdc'," + " 'hostname' = 'hadoop01'," + " 'port' = '3306'," + " 'username' = 'root'," + " 'password' = '518716'," + " 'database-name' = 'smart_village'," + " 'table-name' = 'c_building'" + ")"); tableEnv.executeSql("CREATE TABLE c_building_unit (" + " id STRING," + " name STRING," + " building_id STRING," + " PRIMARY KEY (id) NOT ENFORCED" + //PRIMARY KEY (id) NOT ENFORCED ") WITH (" + " 'connector' = 'mysql-cdc'," + " 'hostname' = 'hadoop01'," + " 'port' = '3306'," + " 'username' = 'root'," + " 'password' = '518716'," + " 'database-name' = 'smart_village'," + " 'table-name' = 'c_building_unit'" + ")"); //tableEnv.executeSql("select c.id , c.name , u.building_id from c_building c left join c_building_unit u on c.id = u.building_id "); //mysqlcdc成功 tableEnv.executeSql("CREATE TABLE dwd_test (id STRING, name STRING, building_id STRING, " + " PRIMARY KEY (id,building_id) NOT ENFORCED" + ") WITH (" + " 'connector' = 'jdbc'," + " 'url' = 'jdbc:mysql://hadoop01:3306/smart_village??characterEncoding=utf-8&useSSL=false'," + " 'username' = 'root'," + " 'password' = '518716'," + " 'table-name' = 'dwd_test'" + ")"); // 创建下游数据表,这里使用print类型的connector,将数据直接打印出来 /* tableEnv.executeSql("CREATE TABLE dwd_test (id STRING, name STRING, building_id STRING, " + " PRIMARY KEY (id,building_id) NOT ENFORCED" + ") WITH (" + " 'connector' = 'jdbc'," + " 'url' = 'jdbc:clickhouse://hdp01:8123/default'," + " 'table-name' = 'dwd_test'" + ")");*/ /* ") WITH (" + " 'connector' = 'jdbc'," + " 'url' = 'jdbc:clickhouse://hdp01:8123/default'," + " 'tableName' = 'dwd_test'," + ")"); */ // 将CDC数据源和下游数据表对接起来 tableEnv.executeSql("INSERT INTO dwd_test select c.id , c.name , if(u.building_id is null ,'99999',u.building_id ) as building_id from c_building c left join c_building_unit u on c.id = u.building_id").print(); /* Table table = tableEnv.sqlQuery("select c.id , c.name , if(u.building_id is null ,'-1',u.building_id ) as building_id ,UNIX_TIMESTAMP()*1000 ts from c_building c left join c_building_unit u on c.id = u.building_id "); DataStream<Tuple2<Boolean, TestBean>> retractStream = tableEnv.toRetractStream(table, TestBean.class); //retractStream.print(); DataStream<TestBean> datas = retractStream.flatMap(new FlatMapFunction<Tuple2<Boolean, TestBean>, TestBean>() { @Override public void flatMap(Tuple2<Boolean, TestBean> booleanTestBeanTuple2, Collector<TestBean> collector) throws Exception { collector.collect(booleanTestBeanTuple2.f1); } }); datas.print(); //TODO 8.将数据写出到ClickHouse datas.addSink(MyClickHouseUtil.getSink("insert into dwd_test values(?,?,?,?)"));*/ env.execute("Flink_CDC_Test"); } }