首先编辑mysql的my.cnf 添加以下后重启 service mysqld restart
#开启binlog的配置
log_bin=mysql_bin
binlog-format=Row
server-id=1
expire_logs_days = 7
max_binlog_size = 500M
#需要同步的数据库名称
binlog-do-db=cdctest
#忽略数据库
binlog-ignore-db=mysql
maven 依赖如下
<properties>
<maven.compiler.source>8</maven.compiler.source>
<maven.compiler.target>8</maven.compiler.target>
<flink.version>1.14.5</flink.version>
<java.version>1.8</java.version>
<slf4j.version>1.7.30</slf4j.version>
<scala.binary.version>2.12</scala.binary.version>
</properties>
<dependencies>
<!-- 引入日志管理相关依赖-->
<dependency>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-api</artifactId>
<version>${slf4j.version}</version>
</dependency>
<dependency>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-log4j12</artifactId>
<version>${slf4j.version}</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-client</artifactId>
<version>3.2.0</version>
</dependency>
<dependency>
<groupId>org.apache.hudi</groupId>
<artifactId>hudi-flink1.14-bundle_${scala.binary.version}</artifactId>
<version>0.11.1</version>
</dependency>
<dependency>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-api</artifactId>
<version>${slf4j.version}</version>
</dependency>
<dependency>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-log4j12</artifactId>
<version>${slf4j.version}</version>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-table-api-scala-bridge_${scala.binary.version}</artifactId>
<version>${flink.version}</version>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-table-api-scala_${scala.binary.version}</artifactId>
<version>${flink.version}</version>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-table-api-scala_${scala.binary.version}</artifactId>
<version>${flink.version}</version>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-table-planner_${scala.binary.version}</artifactId>
<version>${flink.version}</version>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-clients_${scala.binary.version}</artifactId>
<version>${flink.version}</version>
</dependency>
<dependency>
<groupId>com.ververica</groupId>
<artifactId>flink-connector-mysql-cdc</artifactId>
<version>2.3.0</version>
</dependency>
<dependency>
<groupId>mysql</groupId>
<artifactId>mysql-connector-java</artifactId>
<version>8.0.27</version>
</dependency>
<dependency>
<groupId>org.apache.hive</groupId>
<artifactId>hive-exec</artifactId>
<version>3.1.2</version>
</dependency>
</dependencies>
flink 代码如下
object FlinkCDC {
def main(args: Array[String]): Unit = {
val env: StreamExecutionEnvironment = StreamExecutionEnvironment.getExecutionEnvironment()
val tableEnv: StreamTableEnvironment = StreamTableEnvironment.create(env)
FlinkUtils.initKerberos()
FlinkUtils.initCheckPoint(env)
env.setParallelism(1)
val timestamp = 1676472746000L
//使用Flink sql流读
tableEnv.executeSql("" +
" CREATE TABLE user_info " +
"( id int PRIMARY KEY NOT ENFORCED ," +
" name STRING," +
" sex STRING ) WITH ( " +
" 'connector' = 'mysql-cdc'," +
" 'hostname' = '127.0.0.1'," +
" 'port' = '3306'," +
" 'username' = 'root'," +
" 'password' = '123456'," +
" 'server-time-zone' = 'Asia/Shanghai'," +
" 'database-name' = 'zht'," +
" 'table-name' = 'myuserinfo'," +
" 'scan.startup.mode' = 'timestamp'," +
" 'scan.startup.timestamp-millis' = '"+timestamp+"'," + //timestamp可以作为参数传入
" 'scan.snapshot.fetch.size' ='1024'," +
" 'debezium.mysql.include.schema.changes'='true'," +
" 'debezium.snapshot.locking.mode' = 'none') ")
//'scan.startup.mode' = 'initial',
//'scan.startup.mode' = 'timestamp',
//'scan.startup.timestamp-millis' ='1676471846000',
// 'scan.startup.timestamp-millis' =""
// 'scan.startup.mode' = 'earliest-offset',
val date = new Date()
val ts: Long = date.getTime
val partitionpath_format: SimpleDateFormat = new SimpleDateFormat("yyyy")
val years = partitionpath_format.format(date)
val tstable: Table = tableEnv.sqlQuery("select * from user_info").addColumns(lit(ts), lit(years))
tableEnv.toChangelogStream(tstable).print()
tableEnv.executeSql(
"""
| CREATE TABLE hudi_user_info(
| id int PRIMARY KEY NOT ENFORCED,
| name String,
| sex String,
| ts bigint,
| years String)
| PARTITIONED BY (years)
| WITH (
| 'connector' = 'hudi',
| 'path' = 'hdfs://xxxx:8020/datas/hudi_datas/hive/ods/test/hudi_user_info',
| 'table.type' = 'COPY_ON_WRITE',
| 'hoodie.datasource.write.recordkey.field' = 'id,ts',
| 'hoodie.datasource.write.partitionpath.field'='years',
| 'hoodie.datasource.write.hive_style_partitioning' = 'true',
| 'hoodie.datasource.write.partitionpath.urlencode' = 'true',
| 'hoodie.datasource.write.operation' ='upsert',
| 'write.precombine' = 'true',
| 'write.precombine.field' ='ts',
| 'hoodie.index.type'='BLOOM',
| 'hive_sync.enable' = 'true',
| 'hive_sync.mode' = 'hms',
| 'hive_sync.use_jdbc' = 'false',
| 'hive_sync.metastore.uris' = 'thrift://xxxx:9083',
| 'hive_sync.db' = 'hudi_ods',
| 'changelog.enabled'= 'true',
| 'hive_sync.table' = 'hudi_user_info',
| 'hive_sync.assume_date_partitioning' = 'true',
| 'hive_sync.partition_fields' = 'years',
| 'hive_sync.support_timestamp'= 'true',
| 'write.keygenerator.class' = 'org.apache.hudi.keygen.ComplexAvroKeyGenerator',
| 'hive_sync.partition_extractor_class' = 'org.apache.hudi.hive.HiveStylePartitionValueExtractor')
|""".stripMargin)
tableEnv.executeSql("insert into hudi_user_info select * from "+tstable+"")
env.execute()
}
}
tableEnv.executeSql(" CREATE TABLE `pg_policy_declaration` ( " +
" `id` STRING " +
" ,`declaration_start_date` DATE " +
" ,`declaration_end_date` DATE " +
" ,PRIMARY KEY ( `id` ) NOT ENFORCED " +
") WITH (" +
" 'connector' = 'postgres-cdc'," +
" 'hostname' = '127.0.0.1'," +
" 'port' = '5432'," +
" 'username' = 'postgres'," +
" 'password' = '123456'," +
" 'database-name' = 'cdctest'," +
" 'schema-name' = 'public'," +
" 'decoding.plugin.name'='pgoutput', " +
" 'table-name' = 'policy_declaration'," +
" 'schema-name' = 'public'," +
" 'debezium.snapshot.mode' = 'initial'," +
" 'debezium.slot.retry.delay.ms' = '10000'," +
" 'debezium.slot.drop.on.stop' = 'false', " +
" 'debezium.slot.name' = 'policy_declaration_test' )");