1 TiDB测试集群,使用tiup进行安装部署和运维操作。
集群状态如下:
2 简要介绍一下TiDB binlog架构
TiDB Binlog集群主要分为Pump和Drainer两个组件,以及binlogctl工具:
Pump
Pump用于实时记录TiDB产生的Binlog,并将Binlog按照事务的提交时间进行排序,再提供给Drainer进行消费。
Drainer
Drainer从各个Pump中收集Binlog进行归并,再将Binlog转化成SQL或者指定格式的数据,最终同步到下游。
binlogctl工具
binlogctl是一个TiDB Binlog配套的运维工具,具有如下功能:
获取TiDB集群当前的TSO;
查看Pump/Drainer状态;
修改Pump/Drainer状态;
暂停/下线Pump/Drainer。
3 测试环境pump和drainer的部署位置和日志位置
[root@xxx-ticdc-23 drainer-8249]# ls bin/
drainer
[root@xxx-ticdc-23 drainer-8249]# ls conf/
drainer.toml
[root@xxx-ticdc-23 drainer-8249]# ls log/
drainer.log drainer_stderr.log
4 配置文件drainer.toml内容如
# WARNING: This file is auto-generated. Do not edit! All your modification will be overwritten!
# You can use 'tiup cluster edit-config' and 'tiup cluster reload' to update the configuration
# All configuration items you want to change can be added to:
# server_configs:
# drainer:
# aa.b1.c3: value
# aa.b2.c4: value
[syncer]
db-type = "tidb"
[syncer.to]
host = "192.168.1.88"
password = "TEST@2020"
port = 4000
user = "root"
drainer.toml是不建议直接修改的,下面会通过tiup进行编辑修改。
5 将tidb binlog同步到指定文件夹下面
比如在drainer节点创建文件夹/xxxxx/file_from_drainer;
修改drainer的下游配置:
# tiup cluster edit-config tidb-test
使用vi进行编辑即可,找到drainer_servers;
配置syncer.db-type和syncer.to.dir
drainer_servers:
- host: 192.168.1.23
ssh_port: 22
port: 8249
deploy_dir: /xxxxx/tidb-deploy/drainer-8249
data_dir: /xxxxx/tidb-data/drainer-8249
commit_ts: -1
config:
syncer.db-type: file
syncer.to.dir: /xxxxx/file_from_drainer
arch: amd64
os: linux
6 保存配置后reload drainer组件,默认会重启drainer
# tiup cluster reload tidb-test -N 192.168.1.23:8249
7 查看/xxxxx/file_from_drainer下的binlog文件
[root@xxx-ticdc-23 xxxxx]# ls file_from_drainer/
binlog-0000000000000000-20200827152650 binlog-0000000000000042-20200827160329 binlog-0000000000000084-20200827165204 binlog-0000000000000126-20200827182750
......
查看一下文件内容,恍恍惚惚看懂一点
[root@xxx-ticdc-23 xxxxx]# head -n 10 file_from_drainer/binlog-0000000000000000-20200827152650
�l
����������-
�
sbtestsbtest4"
idint��"
kintک�"�
c�char"z�37632108549-35152435769-71163438075-76047767136-94022208193-37827536126-61826128934-31159094192-50908702293-70158721962"M
pad�char"=v48470296239-65598868380-44347847785-96102491963-02379738623
�
sbtestsbtest4"
idint��"
8 下面重点是将TiDB binlog同步到下游Kafka
修改drainer的配置为kafka,并重新reload drainer。
drainer_servers:
- host: 192.168.1.23
ssh_port: 22
port: 8249
deploy_dir: /xxxxx/tidb-deploy/drainer-8249
data_dir: /xxxxx/tidb-data/drainer-8249
commit_ts: -1
config:
syncer.db-type: kafka
syncer.to.kafka-addrs :"192.168.1.209:9092,192.168.1.210:9092,192.168.1.211:9092"
syncer.to.kafka-version :"2.0.0"
syncer.to.kafka-max-messages :1024
arch: amd64
os: linux
9 通过drainer的日志,可以完整的看到pump到drainer的初始化过程
略去了日志行开头格式:[2020/08/31 11:18:38.322 +08:00] [INFO];
go源代码文件:行号
[version.go:50] ["Welcome to Drainer"] ["Release Version"=v4.0.0-rc.2] ["Git Commit Hash"=a75036cf8933a581cac42c1007bf92c9e5417b90] ["Build TS"="2020-05-27 11:00:45"] ["Go Version"=go1.13] ["Go OS/Arch"=linux/amd64]
[main.go:46] ["start drainer..."] [config="{\"log-level\":\"info\",\"node-id\":\"192.168.1.23:8249\",\"addr\":\"http://192.168.1.23:8249\",\"advertise-addr\":\"http://192.168.1.23:8249\",\"data-dir\":\"/xxxxx/tidb-data/drainer-8249\",\"detect-interval\":5,\"pd-urls\":\"http://192.168.1.18:2379\",\"log-file\":\"/xxxxx/tidb-deploy/drainer-8249/log/drainer.log\",\"initial-commit-ts\":-1,\"sycner\":{\"sql-mode\":null,\"ignore-txn-commit-ts\":null,\"ignore-schemas\":\"INFORMATION_SCHEMA,PERFORMANCE_SCHEMA,mysql\",\"ignore-table\":null,\"txn-batch\":20,\"loopback-control\":false,\"sync-ddl\":true,\"channel-id\":0,\"worker-count\":1,\"to\":{\"host\":\"\",\"user\":\"\",\"password\":\"\",\"security\":{\"ssl-ca\":\"\",\"ssl-cert\":\"\",\"ssl-key\":\"\",\"cert-allowed-cn\":null},\"encrypted_password\":\"\",\"sync-mode\":0,\"port\":0,\"checkpoint\":{\"type\":\"\",\"schema\":\"\",\"host\":\"\",\"user\":\"\",\"password\":\"\",\"encrypted_password\":\"\",\"port\":0},\"dir\":\"\",\"retention-time\":0,\"zookeeper-addrs\":\"\",\"kafka-addrs\":\"192.168.1.209:9092,192.168.1.210:9092,192.168.1.211:9092\",\"kafka-version\":\"2.0.0\",\"kafka-max-messages\":1024,\"kafka-client-id\":\"\",\"topic-name\":\"\"},\"replicate-do-table\":null,\"replicate-do-db\":null,\"db-type\":\"kafka\",\"relay\":{\"log-dir\":\"\",\"max-file-size\":10485760},\"disable-dispatch-flag\":null,\"enable-dispatch-flag\":null,\"disable-dispatch\":null,\"enable-dispatch\":null,\"safe-mode\":false,\"disable-detect-flag\":null,\"enable-detect-flag\":null,\"disable-detect\":null,\"enable-detect\":null},\"security\":{\"ssl-ca\":\"\",\"ssl-cert\":\"\",\"ssl-key\":\"\",\"cert-allowed-cn\":null},\"synced-check-time\":5,\"compressor\":\"\",\"EtcdTimeout\":5000000000,\"MetricsAddr\":\"\",\"MetricsInterval\":15}"]
[client.go:135] ["[pd] create pd client with endpoints"] [pd-address="[http://192.168.1.18:2379]"]
[base_client.go:242] ["[pd] switch leader"] [new-leader=http://192.168.1.18:2379] [old-leader=]
[base_client.go:92] ["[pd] init cluster id"] [cluster-id=6865559301320830486]
[server.go:120] ["get cluster id from pd"] [id=6865559301320830486]
[server.go:129] ["set InitialCommitTS"] [ts=419127339796135937]
[checkpoint.go:64] ["initialize checkpoint"] [type=file] [checkpoint=419127247560770292] [cfg="{\"CheckpointType\":\"file\",\"Db\":null,\"Schema\":\"\",\"Table\":\"\",\"ClusterID\":6865559301320830486,\"InitialCommitTS\":419127339796135937,\"dir\":\"/xxxxx/tidb-data/drainer-8249/savepoint\"}"]
[store.go:68] ["new store"] [path="tikv://192.168.1.18:2379?disableGC=true"]
[client.go:135] ["[pd] create pd client with endpoints"] [pd-address="[192.168.1.18:2379]"]
[base_client.go:242] ["[pd] switch leader"] [new-leader=http://192.168.1.18:2379] [old-leader=]
[base_client.go:92] ["[pd] init cluster id"] [cluster-id=6865559301320830486]
[store.go:74] ["new store with retry success"]
[client.go:127] ["[sarama] Initializing new client"]
[client.go:174] ["[sarama] Successfully initialized new client"]
[store.go:68] ["new store"] [path="tikv://192.168.1.18:2379?disableGC=true"]
[client.go:135] ["[pd] create pd client with endpoints"] [pd-address="[192.168.1.18:2379]"]
[base_client.go:242] ["[pd] switch leader"] [new-leader=http://192.168.1.18:2379] [old-leader=]
[base_client.go:92] ["[pd] init cluster id"] [cluster-id=6865559301320830486]
[store.go:74] ["new store with retry success"]
[server.go:265] ["register success"] ["drainer node id"=192.168.1.23:8249]
[server.go:326] ["start to server request"] [addr=http://192.168.1.23:8249]
[merge.go:222] ["merger add source"] ["source id"=192.168.1.17:8250]
[merge.go:222] ["merger add source"] ["source id"=192.168.1.18:8250]
[merge.go:222] ["merger add source"] ["source id"=192.168.1.23:8250]
[pump.go:138] ["pump create pull binlogs client"] [id=192.168.1.23:8250]
[pump.go:138] ["pump create pull binlogs client"] [id=192.168.1.17:8250]
[pump.go:138] ["pump create pull binlogs client"] [id=192.168.1.18:8250]
[collector.go:283] ["start query job"] [id=161] [binlog="tp:Commit start_ts:419127247678735203 commit_ts:419127247691842210 prewrite_key:\"mDBs\\000\\000\\000\\000\\000\\372\\000\\000\\000\\000\\000\\000\\000H\" ddl_query:\"create database binlog_kafka_test\" ddl_job_id:161 ddl_schema_state:5 "]
[collector.go:305] ["get ddl job"] [job="ID:161, Type:create schema, State:synced, SchemaState:public, SchemaID:160, TableID:0, RowCount:0, ArgLen:0, start time: 2020-08-31 11:12:45.669 +0800 CST, Err:<nil>, ErrCount:0, SnapshotVersion:0"]
[syncer.go:442] ["add ddl item to syncer, you can add this commit ts to `ignore-txn-commit-ts` to skip this ddl if needed"] [sql="create database binlog_kafka_test"] ["commit ts"=419127247691842210]
[client.go:772] ["[sarama] client/metadata fetching metadata for [6865559301320830486_obinlog] from broker 192.168.1.209:9092\n"]
[broker.go:214] ["[sarama] Connected to broker at 192.168.1.209:9092 (unregistered)\n"]
[collector.go:283] ["start query job"] [id=163] [binlog="tp:Commit start_ts:419127262974837310 commit_ts:419127262987944132 prewrite_key:\"mDB:160\\000\\000\\375\\000\\000\\000\\000\\000\\000\\000H\" ddl_query:\"create table t1 (id int, tname varchar(55))\" ddl_job_id:163 ddl_schema_state:5 "]
[collector.go:305] ["get ddl job"] [job="ID:163, Type:create table, State:synced, SchemaState:public, SchemaID:160, TableID:162, RowCount:0, ArgLen:0, start time: 2020-08-31 11:13:44.019 +0800 CST, Err:<nil>, ErrCount:0, SnapshotVersion:0"]
[syncer.go:442] ["add ddl item to syncer, you can add this commit ts to `ignore-txn-commit-ts` to skip this ddl if needed"] [sql="create table t1 (id int, tname varchar(55))"] ["commit ts"=419127262987944132]
[client.go:534] ["[sarama] client/brokers registered new broker #2 at test-hadoop-211.xxxxx:9092"]
[client.go:534] ["[sarama] client/brokers registered new broker #1 at test-hadoop-210.xxxxx:9092"]
[client.go:534] ["[sarama] client/brokers registered new broker #0 at test-hadoop-209.xxxxx:9092"]
[client.go:791] ["[sarama] client/metadata found some partitions to be leaderless"]
[client.go:759] ["[sarama] client/metadata retrying after 500ms... (10000 attempts remaining)\n"]
[client.go:772] ["[sarama] client/metadata fetching metadata for [6865559301320830486_obinlog] from broker 192.168.1.209:9092\n"]
[async_producer.go:711] ["[sarama] producer/broker/2 starting up\n"]
[async_producer.go:727] ["[sarama] producer/broker/2 state change to [open] on 6865559301320830486_obinlog/0\n"]
[broker.go:212] ["[sarama] Connected to broker at test-hadoop-211.xxxxx:9092 (registered as #2)\n"]
[syncer.go:257] ["write save point"] [ts=419127247691842210]
[syncer.go:257] ["write save point"] [ts=419127262987944132]
......
9.1 日志中显示从最近的InitialCommitTS开始同步tidb binlog
419127339796135937
9.2 内容是创建了一个数据库和一张表
mysql连接tidb后的操作SQL
mysql> create database binlog_kafka_test;
Query OK, 0 rows affected (0.22 sec)
mysql> use binlog_kafka_test;
Database changed
mysql> create table t1 (id int, tname varchar(55));
Query OK, 0 rows affected (0.25 sec)
10 通过Kafka客户端Kafka Tool查看默认的topic【6865559301320830486_obinlog】和接收到的消息
一次commit(一个事务)一条message;一个事务里面多条insert update delete是在一条message里面;
注意message很可能会变得很长;具体message内容是protobuf编码的一段二进制数据。
一个短的:
080110A285F0D2ADC0C2E80522380A1162696E6C6F675F6B61666B615F7465737412001A216372656174652064617461626173652062696E6C6F675F6B61666B615F74657374
一个长的:
0800108280F0EB9FCDC2E8051AE1010A490A1162696E6C6F675F6B61666B615F74657374120274311A0B0A0269641203696E7418001A120A05746E616D651207766172636861721800220F0800120B0A0210030A053203535A500A490A1162696E6C6F675F6B61666B615F74657374120274311A0B0A0269641203696E7418001A120A05746E616D651207766172636861721800220F0800120B0A0210040A053203535A500A490A1162696E6C6F675F6B61666B615F74657374120274311A0B0A0269641203696E7418001A120A05746E616D651207766172636861721800220F0800120B0A0210050A053203535A50
11 通过Java KafkaConsumer解析消费TiDB binlog数据
Java源码:
https://github.com/pingcap/tidb-tools/tree/master/tidb-binlog/driver/example/kafkaReader
public class Booter {
/**
* 主题
*/
private static String topic = "6865559301320830486_obinlog";
/**
* kafka brokers
*/
private static String serever = "192.168.1.209:9092,192.168.1.210:9092,192.168.1.211:9092";
/**
* 消费者偏移量
*/
private static long offset = 60;
/**
* 消费者线程
*/
private Thread kafkaConsumerThread;
/**
* 消费者
*/
private KafkaConsumer<String, byte[]> consumer;
public static void main(String[] args) {
Booter booter = new Booter();
booter.init();
}
public void init() {
Properties props = assembleConsumerProperties();
this.consumer = new KafkaConsumer(props);
consumer.assign(Arrays.asList(new TopicPartition(Booter.topic, 0)));
kafkaConsumerThread = new Thread(() -> {
Map<TopicPartition, OffsetAndMetadata> currentOffsets = new HashMap<>();
int initOffset = 0;
while (true) {
try {
// 指定分区消费的某个offset消费
consumer.seek(new TopicPartition(Booter.topic, 0), initOffset);
ConsumerRecords<String, byte[]> records = consumer.poll(200);
for (ConsumerRecord<String, byte[]> record : records) {
try {
//处理消息
dealMessage(record.value());
currentOffsets.put(new TopicPartition(Booter.topic, record.partition()), new OffsetAndMetadata(record.offset() + 1, "no metadata"));
//提交
consumer.commitSync(currentOffsets);
currentOffsets.clear();
//记录消息offset到db
} catch (Exception ie) {
//当前消息处理失败
currentOffsets.clear();
}
}
initOffset = initOffset + records.count();
} catch (Exception e) {
currentOffsets.clear();
}
}
});
kafkaConsumerThread.setName("kafkaConsumerThread");
kafkaConsumerThread.start();
}
private void dealMessage(byte[] value) throws Exception {
// byte[]解析
BinLogInfo.Binlog binlog = BinLogInfo.Binlog.parseFrom(value);
System.out.println(binlog.toString());
}
private Properties assembleConsumerProperties() {
Properties props = new Properties();
props.put("bootstrap.servers", Booter.serever);
props.put("group.id", "mytest");
//自动提交位移关闭
props.put("enable.auto.commit", "false");
props.put("auto.commit.interval.ms", "1000");
props.put("session.timeout.ms", "30000");
props.put("max.poll.records", "10");
//必须使用ByteArrayDeserializer
props.put("key.deserializer", "org.apache.kafka.common.serialization.ByteArrayDeserializer");
props.put("value.deserializer", "org.apache.kafka.common.serialization.ByteArrayDeserializer");
return props;
}
}
12 解析出来的TiDB binlog内容
12.1 DDL创建数据库和创建表
type: DDL
commit_ts: 419127247691842210
ddl_data {
schema_name: "binlog_kafka_test"
table_name: ""
ddl_query: "create database binlog_kafka_test"
}
type: DDL
commit_ts: 419127262987944132
ddl_data {
schema_name: "binlog_kafka_test"
table_name: "t1"
ddl_query: "create table t1 (id int, tname varchar(55))"
}
12.2 DDL更改表结构
type: DDL
commit_ts: 419133161306587253
ddl_data {
schema_name: "binlog_kafka_test"
table_name: "t1"
ddl_query: "alter table t1 add column(age int)"
}
12.3 DML一个事务插入多条数据
type: DML
commit_ts: 419127690662772738
dml_data {
tables {
schema_name: "binlog_kafka_test"
table_name: "t1"
column_info {
name: "id"
mysql_type: "int"
is_primary_key: false
}
column_info {
name: "tname"
mysql_type: "varchar"
is_primary_key: false
}
mutations {
type: Insert
row {
columns {
int64_value: 3
}
columns {
string_value: "SZP"
}
}
}
}
tables {
schema_name: "binlog_kafka_test"
table_name: "t1"
column_info {
name: "id"
mysql_type: "int"
is_primary_key: false
}
column_info {
name: "tname"
mysql_type: "varchar"
is_primary_key: false
}
mutations {
type: Insert
row {
columns {
int64_value: 4
}
columns {
string_value: "SZP"
}
}
}
}
tables {
schema_name: "binlog_kafka_test"
table_name: "t1"
column_info {
name: "id"
mysql_type: "int"
is_primary_key: false
}
column_info {
name: "tname"
mysql_type: "varchar"
is_primary_key: false
}
mutations {
type: Insert
row {
columns {
int64_value: 5
}
columns {
string_value: "SZP"
}
}
}
}
}
12.4 DML更新数据
type: DML
commit_ts: 419127661866254338
dml_data {
tables {
schema_name: "binlog_kafka_test"
table_name: "t1"
column_info {
name: "id"
mysql_type: "int"
is_primary_key: false
}
column_info {
name: "tname"
mysql_type: "varchar"
is_primary_key: false
}
mutations {
type: Update
row {
columns {
int64_value: 1
}
columns {
string_value: "SZP_sum"
}
}
change_row {
columns {
int64_value: 1
}
columns {
string_value: "SZP"
}
}
}
}
}
12.5 DML删除数据
type: DML
commit_ts: 419133138237915175
dml_data {
tables {
schema_name: "binlog_kafka_test"
table_name: "t1"
column_info {
name: "id"
mysql_type: "int"
is_primary_key: false
}
column_info {
name: "tname"
mysql_type: "varchar"
is_primary_key: false
}
mutations {
type: Delete
row {
columns {
int64_value: 1
}
columns {
string_value: "SZP_sum"
}
}
}
}
}
参考
https://pingcap.com/blog-cn/tidb-binlog-source-code-reading-2/
https://docs.pingcap.com/zh/tidb/dev/tiup-cluster
https://docs.pingcap.com/zh/tidb/stable/maintain-tidb-using-tiup
https://docs.pingcap.com/zh/tidb/stable/tidb-binlog-overview
https://github.com/pingcap/tidb-tools/tree/master/tidb-binlog/driver/example/kafkaReader
https://github.com/pingcap/tidb-tools/tree/master/tidb-binlog/driver
一个批量移动zkdatalog文件的Linux脚本
# cat /proc/version
Linux version 3.10.0-1127.18.2.el7.x86_64 (mockbuild@kbuilder.bsys.centos.org) (gcc version 4.8.5 20150623 (Red Hat 4.8.5-39) (GCC) ) #1 SMP Sun Jul 26 15:27:06 UTC 2020
# find /xxxxx/admin/zookeeper/zkdatalog/version-2 -mtime +14 -name 'log.*' | xargs -i mv {} /yyyyy/zkdatalog_backup/
往期推荐: