主库my.cnf
[mysqld]
sql_mode = 'STRICT_TRANS_TABLES,NO_ZERO_IN_DATE,NO_ZERO_DATE,ERROR_FOR_DIVISION_BY_ZERO,NO_AUTO_CREATE_USER,NO_ENGINE_SUBSTITUTION'
#添加密码验证插件
plugin-load-add=validate_password.so
#服务器在启动时加载插件,并防止在服务器运行时删除插件
validate-password=FORCE_PLUS_PERMANENT
#binlog日志的基本文件名,binlog文件的索引文件,管理所有binlog文件
log-bin=/var/lib/mysql/mysql-bin
#配置serverid
server-id=1
#
# Remove leading # and set to the amount of RAM for the most important data
# cache in MySQL. Start at 70% of total RAM for dedicated server, else 10%.
# innodb_buffer_pool_size = 128M
#
# Remove leading # to turn on a very important data integrity option: logging
# changes to the binary log between backups.
# log_bin
#
# Remove leading # to set options mainly useful for reporting servers.
# The server defaults are faster for transactions and fast SELECTs.
# Adjust sizes as needed, experiment to find the optimal values.
# join_buffer_size = 128M
# sort_buffer_size = 2M
# read_rnd_buffer_size = 2M
datadir=/var/lib/mysql
socket=/var/lib/mysql/mysql.sock
# Disabling symbolic-links is recommended to prevent assorted security risks
symbolic-links=0
log-error=/var/log/mysqld.log
pid-file=/var/run/mysqld/mysqld.pid
character-set-server=utf8
#最大连接数
max_connections=1200
[client]
default-character-set=utf8
[mysql]
default-character-set=utf8
从库my.cnf
# For advice on how to change settings please see
# http://dev.mysql.com/doc/refman/5.7/en/server-configuration-defaults.html
[mysqld]
sql_mode = 'STRICT_TRANS_TABLES,NO_ZERO_IN_DATE,NO_ZERO_DATE,ERROR_FOR_DIVISION_BY_ZERO,NO_AUTO_CREATE_USER,NO_ENGINE_SUBSTITUTION'
#添加密码验证插件
plugin-load-add=validate_password.so
##服务器在启动时加载插件,并防止在服务器运行时删除插件
validate-password=FORCE_PLUS_PERMANENT
#
##binlog日志的基本文件名,binlog文件的索引文件,管理所有binlog文件
log-bin=/var/lib/mysql/mysql-bin
##配置serverid
server-id=2
##slave增量消费binlog参数
log-slave-updates = 1
#
#
# Remove leading # and set to the amount of RAM for the most important data
# cache in MySQL. Start at 70% of total RAM for dedicated server, else 10%.
# innodb_buffer_pool_size = 128M
#
# Remove leading # to turn on a very important data integrity option: logging
# changes to the binary log between backups.
# log_bin
#
# Remove leading # to set options mainly useful for reporting servers.
# The server defaults are faster for transactions and fast SELECTs.
# Adjust sizes as needed, experiment to find the optimal values.
# join_buffer_size = 128M
# sort_buffer_size = 2M
# read_rnd_buffer_size = 2M
datadir=/var/lib/mysql
socket=/var/lib/mysql/mysql.sock
# Disabling symbolic-links is recommended to prevent assorted security risks
symbolic-links=0
log-error=/var/log/mysqld.log
pid-file=/var/run/mysqld/mysqld.pid
这个参数是决定消费从库binlog,数据增量的必要条件
##slave增量消费binlog参数
log-slave-updates = 1
配置主从关系
如果重启从库需要重新配置主库和从库
进入主库
#进入mysql添加授权远程复制用户sync
grant replication slave on *.* to 'sync'@'hostname' identified by '密码***';
#刷新
flush privileges;
#重新查看主库状态
show master status;
进入从库
#连接主库,进入mysql连接master数据库,并启动slave线程
change master to master_host='hostname',
master_user='sync',
master_password='密码***',
master_log_file='master-bin.000002',
master_log_pos=599;
#启动从库
start slave;
#查看状态
show slave status\G;
flink测试代码
import io.debezium.engine.DebeziumEngine;
import io.debezium.engine.format.Json;
import org.apache.flink.api.common.JobExecutionResult;
import org.apache.flink.api.common.JobID;
import org.apache.flink.configuration.Configuration;
import org.apache.flink.configuration.PipelineOptions;
import org.apache.flink.configuration.RestOptions;
import org.apache.flink.core.execution.JobClient;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.table.api.*;
import org.apache.flink.table.api.bridge.java.StreamTableEnvironment;
import java.util.Optional;
/*Session模式是预分配资源的,也就是提前根据指定的资源参数初始化一个Flink集群,
并常驻在YARN系统中,拥有固定数量的JobManager和TaskManager(注意JobManager只有一个)。
提交到这个集群的作业可以直接运行,免去每次分配资源的overhead。但是Session的资
源总量有限,多个作业之间又不是隔离的,故可能会造成资源的争用;如果有一个TaskManager宕机,
它上面承载着的所有作业也都会失败。另外,启动的作业越多,JobManager的负载也就越大。
所以,Session模式一般用来部署那些对延迟非常敏感但运行时长较短的作业。*/
public class SubmitMysqlToKafkaSession {
// private static final long serialVersionUID = -4852684966051743776L;
public static void main(String[] args) throws Exception {
StreamExecutionEnvironment env = StreamExecutionEnvironment.createRemoteEnvironment("cdh04", 8081);
// StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
Configuration conf = new Configuration();
conf.setString(RestOptions.BIND_PORT, "8081");
// StreamExecutionEnvironment env = StreamExecutionEnvironment.createLocalEnvironmentWithWebUI(conf);
EnvironmentSettings bsSettings = EnvironmentSettings.newInstance().useBlinkPlanner().inStreamingMode().build();
StreamTableEnvironment stEnv = StreamTableEnvironment.create(env, bsSettings);
stEnv.getConfig().getConfiguration().setString(PipelineOptions.NAME, "mysql-kafka-earliest-offset");
// TableEnvironment tableEnv = TableEnvironment.create(env,bsSettings);
stEnv.getConfig().setSqlDialect(SqlDialect.DEFAULT);
stEnv.executeSql("CREATE TABLE flinkcdctest (\n" +
" `id` STRING,\n" +
" `name` STRING\n" +
") WITH (\n" +
" 'connector' = 'kafka',\n" +
" 'topic' = 'mysqloffset000000',\n" +
" 'properties.bootstrap.servers' = 'CDH03:9092',\n" +
" 'scan.startup.mode' = 'earliest-offset',\n" +
" 'format' = 'debezium-json'\n" +
")");
stEnv.executeSql("CREATE TABLE testcdc(\n" +
" `id` STRING,\n" +
" `name` STRING\n" +
") WITH (\n" +
" 'connector' = 'mysql-cdc',\n" +
" 'hostname' = 'CDH03',\n" +
" 'port' = '3306',\n" +
" 'username' = 'root',\n" +
" 'database-name' = 'dwd_common_dynamic',\n" +
// " 'scan.startup.mode' = 'specific-offset',\n" +
//初始化表的时候对于上面读取bingo位置的配置不生效
" 'debezium.snapshot.mode' = 'initial',\n" +
//在时间戳启动模式下指定启动时间戳
" 'scan.startup.mode' = 'timestamp',\n" +
" 'scan.startup.timestamp-millis' = '1669101959000',\n" +
" 'scan.incremental.snapshot.enabled'='false',\n" +
" 'table-name' = 'testcdc'\n" +
")");
TableResult tableResult = stEnv.executeSql("insert into flinkcdctest select * from testcdc");
//获取任务id
Optional<JobClient> jobClient = tableResult.getJobClient();
JobClient jobClient1 = jobClient.get();
JobID jobID = jobClient1.getJobID();
System.out.println(jobID);
System.out.println(stEnv.listCatalogs().toString());
//多Insert语句可以使用StatementSet
//stEnv.createStatementSet();
//任务启动,这行必不可少!
}
}
测试完成