1 TiDB测试集群,使用tiup进行安装部署和运维操作。
集群状态如下:
2 简要介绍一下TiDB binlog架构
TiDB Binlog集群主要分为Pump和Drainer两个组件,以及binlogctl工具:
Pump
Pump用于实时记录TiDB产生的Binlog,并将Binlog按照事务的提交时间进行排序,再提供给Drainer进行消费。
Drainer
Drainer从各个Pump中收集Binlog进行归并,再将Binlog转化成SQL或者指定格式的数据,最终同步到下游。
binlogctl工具
binlogctl是一个TiDB Binlog配套的运维工具,具有如下功能:
获取TiDB集群当前的TSO;
查看Pump/Drainer状态;
修改Pump/Drainer状态;
暂停/下线Pump/Drainer。
3 测试环境pump和drainer的部署位置和日志位置
[root@xxx-ticdc-23 drainer-8249]# ls bin/
drainer
[root@xxx-ticdc-23 drainer-8249]# ls conf/
drainer.toml
[root@xxx-ticdc-23 drainer-8249]# ls log/
drainer.log drainer_stderr.log
4 配置文件drainer.toml内容如
# WARNING: This file is auto-generated. Do not edit! All your modification will be overwritten!
# You can use 'tiup cluster edit-config' and 'tiup cluster reload' to update the configuration
# All configuration items you want to change can be added to:
# server_configs:
# drainer:
# aa.b1.c3: value
# aa.b2.c4: value
[syncer]
db-type = "tidb"
[syncer.to]
host = "192.168.1.88"
password = "TEST@2020"
port = 4000
user = "root"
drainer.toml是不建议直接修改的,下面会通过tiup进行编辑修改。
5 将tidb binlog同步到指定文件夹下面
比如在drainer节点创建文件夹/xxxxx/file_from_drainer; 修改drainer的下游配置:
# tiup cluster edit-config tidb-test
使用vi进行编辑即可,找到drainer_servers; 配置syncer.db-type和syncer.to.dir
drainer_servers:
- host: 192.168.1.23
ssh_port: 22
port: 8249
deploy_dir: /xxxxx/tidb-deploy/drainer-8249
data_dir: /xxxxx/tidb-data/drainer-8249
commit_ts: -1
config:
syncer.db-type: file
syncer.to.dir: /xxxxx/file_from_drainer
arch: amd64
os: linux
6 保存配置后reload drainer组件,默认会重启drainer
# tiup cluster reload tidb-test -N 192.168.1.23:8249
7 查看/xxxxx/file_from_drainer下的binlog文件
[root@xxx-ticdc-23 xxxxx]# ls file_from_drainer/
binlog-0000000000000000-20200827152650 binlog-0000000000000042-20200827160329 binlog-0000000000000084-20200827165204 binlog-0000000000000126-20200827182750
......
查看一下文件内容,恍恍惚惚看懂一点
[root@xxx-ticdc-23 xxxxx]# head -n 10 file_from_drainer/binlog-0000000000000000-20200827152650
�l
����������-
�
sbtestsbtest4"
idint��"
kint �"�
c�char"z�37632108549-35152435769-71163438075-76047767136-94022208193-37827536126-61826128934-31159094192-50908702293-70158721962"M
pad�char"=v48470296239-65598868380-44347847785-96102491963-02379738623
�
sbtestsbtest4"
idint��"
8 下面重点是将TiDB binlog同步到下游Kafka
修改drainer的配置为kafka,并重新reload drainer。
drainer_servers:
- host: 192.168.1.23
ssh_port: 22
port: 8249
deploy_dir: /xxxxx/tidb-deploy/drainer-8249
data_dir: /xxxxx/tidb-data/drainer-8249
commit_ts: -1
config:
syncer.db-type: kafka
syncer.to.kafka-addrs :"192.168.1.209:9092,192.168.1.210:9092,192.168.1.211:9092"
syncer.to.kafka-version :"2.0.0"
syncer.to.kafka-max-messages :1024
arch: amd64
os: linux
9 通过drainer的日志,可以完整的看到pump到drainer的初始化过程
略去了日志行开头格式:[2020/08/31 11:18:38.322 +08:00] [INFO];
go源代码文件:行号
[version.go:50] ["Welcome to Drainer"] ["Release Version"=v4.0.0-rc.2] ["Git Commit Hash"=a75036cf8933a581cac42c1007bf92c9e5417b90] ["Build TS"="2020-05-27 11:00:45"] ["Go Version"=go1.13] ["Go OS/Arch"=linux/amd64]
[main.go:46] ["start drainer..."] [config="{"log-level":"info","node-id":"192.168.1.23:8249","addr":"http://192.168.1.23:8249","advertise-addr":"http://192.168.1.23:8249","data-dir":"/xxxxx/tidb-data/drainer-8249","detect-interval":5,"pd-urls":"http://192.168.1.18:2379","log-file":"/xxxxx/tidb-deploy/drainer-8249/log/drainer.log","initial-commit-ts":-1,"sycner":{"sql-mode":null,"ignore-txn-commit-ts":null,"ignore-schemas":"INFORMATION_SCHEMA,PERFORMANCE_SCHEMA,mysql","ignore-table":null,"txn-batch":20,"loopback-control":false,"sync-ddl":true,"channel-id":0,"worker-count":1,"to":{"host":"","user":"","password":"","security":{"ssl-ca":"","ssl-cert":"","ssl-key":"","cert-allowed-cn":null},"encrypted_password":"","sync-mode":0,"port":0,"checkpoint":{"type":"","schema":"","host":"","user":"","password":"","encrypted_password":"","port":0},"dir":"","retention-time":0,"zookeeper-addrs":"","kafka-addrs":"192.168.1.209:9092,192.168.1.210:9092,192.168.1.211:9092","kafka-version":"2.0.0","kafka-max-messages":1024,"kafka-client-id":"","topic-name":""},"replicate-do-table":null,"replicate-do-db":null,"db-type":"kafka","relay":{"log-dir":"","max-file-size":10485760},"disable-dispatch-flag":null,"enable-dispatch-flag":null,"disable-dispatch":null,"enable-dispatch":null,"safe-mode":false,"disable-detect-flag":null,"enable-detect-flag":null,"disable-detect":null,"enable-detect":null},"security":{"ssl-ca":"","ssl-cert":"","ssl-key":"","cert-allowed-cn":null},"synced-check-time":5,"compressor":"","EtcdTimeout":5000000000,"MetricsAddr":"","MetricsInterval":15}"]
[client.go:135] ["[pd] create pd client with endpoints"] [pd-address="[http://192.168.1.18:2379]"]
[base_client.go:242] ["[pd] switch leader"] [new-leader=http://192.168.1.18:2379] [old-leader=]
[base_client.go:92] ["[pd] init cluster id"] [cluster-id=6865559301320830486]
[server.go:120] ["get cluster id from pd"] [id=6865559301320830486]
[server.go:129] ["set InitialCommitTS"] [ts=419127339796135937]
[checkpoint.go:64] ["initialize checkpoint"] [type=file] [checkpoint=419127247560770292] [cfg="{"CheckpointType":"file","Db":null,"Schema":"","Table":"","ClusterID":6865559301320830486,"InitialCommitTS":419127339796135937,"dir":"/xxxxx/tidb-data/drainer-8249/savepoint"}"]
[store.go:68] ["new store"] [path="tikv://192.168.1.18:2379?disableGC=true"]
[client.go:135] ["[pd] create pd client with endpoints"] [pd-address="[192.168.1.18:2379]"]
[base_client.go:242] ["[pd] switch leader"] [new-leader=http://192.168.1.18:2379] [old-leader=]
[base_client.go:92] ["[pd] init cluster id"] [cluster-id=6865559301320830486]
[store.go:74] ["new store with retry success"]
[client.go:127] ["[sarama] Initializing new client"]
[client.go:174] ["[sarama] Successfully initialized new client"]
[store.go:68] ["new store"] [path="tikv://192.168.1.18:2379?disableGC=true"]
[client.go:135] ["[pd] create pd client with endpoints"] [pd-address="[192.168.1.18:2379]"]
[base_client.go:242] ["[pd] switch leader"] [new-leader=http://192.168.1.18:2379] [old-leader=]
[base_client.go:92] ["[pd] init cluster id"] [cluster-id=6865559301320830486]
[store.go:74] ["new store with retry success"]
[server.go:265] ["register success"] ["drainer node id"=192.168.1.23:8249]
[server.go:326] ["start to server request"] [addr=http://192.168.1.23:8249]
[merge.go:222] ["merger add source"] ["source id"=192.168.1.17:8250]
[merge.go:222] ["merger add source"] ["source id"=192.168.1.18:8250]
[merge.go:222] ["merger add source"] ["source id"=192.168.1.23:8250]
[pump.go:138] ["pump create pull binlogs client"] [id=192.168.1.23:8250]
[pump.go:138] ["pump create pull binlogs client"] [id=192.168.1.17:8250]
[pump.go:138] ["pump create pull binlogs client"] [id=192.168.1.18:8250]
[collector.go:283] ["start query job"] [id=161] [binlog="tp:Commit start_ts:419127247678735203 commit_ts:419127247691842210 prewrite_key:"mDBs000000000000000372000000000000000000000H" ddl_query:"create database binlog_kafka_test" ddl_job_id:161 ddl_schema_state:5 "]
[collector.go:305] ["get ddl job"] [job="ID:161, Type:create schema, State:synced, SchemaState:public, SchemaID:160, TableID:0, RowCount:0, ArgLen:0, start time: 2020-08-31 11:12:45.669 +0800 CST, Err:<nil>, ErrCount:0, SnapshotVersion:0"]
[syncer.go:442] ["add ddl item to syncer, you can add this commit ts to `ignore-txn-commit-ts` to skip this ddl if needed"] [sql="create database binlog_kafka_test"] ["commit ts"=419127247691842210]
[client.go:772] ["[sarama] client/metadata fetching metadata for [6865559301320830486_obinlog] from broker 192.168.1.209:9092n"]
[broker.go:214] ["[sarama] Connected to broker at 192.168.1.209:9092 (unregistered)n"]
[collector.go:283] ["start query job"] [id=163] [binlog="tp:Commit start_ts:419127262974837310 commit_ts:419127262987944132 prewrite_key:"mDB:160000000375000000000000000000000H" ddl_query:"create table t1 (id int, tname varchar(55))" ddl_job_id:163 ddl_schema_state:5 "]
[collector.go:305] ["get ddl job"] [job="ID:163, Type:create table, State:synced, SchemaState:public, SchemaID:160, TableID:162, RowCount:0, ArgLen:0, start time: 2020-08-31 11:13:44.019 +0800 CST, Err:<nil>, ErrCount:0, SnapshotVersion:0"]
[syncer.go:442] ["add ddl item to syncer, you can add this commit ts to `ignore-txn-commit-ts` to skip this ddl if needed"] [sql="create table t1 (id int, tname varchar(55))"] ["commit ts"=419127262987944132]
[client.go:534] ["[sarama] client/brokers registered new broker #2 at test-hadoop-211.xxxxx:9092"]
[client.go:534] ["[sarama] client/brokers registered new broker #1 at test-hadoop-210.xxxxx:9092"]
[client.go:534] ["[sarama] client/brokers registered new broker #0 at test-hadoop-209.xxxxx:9092"]
[client.go:791] ["[sarama] client/metadata found some partitions to be leaderless"]
[client.go:759] ["[sarama] client/metadata retrying after 500ms... (10000 attempts remaining)n"]
[client.go:772] ["[sarama] client/metadata fetching metadata for [6865559301320830486_obinlog] from broker 192.168.1.209:9092n"]
[async_producer.go:711] ["[sarama] producer/broker/2 starting upn"]
[async_producer.go:727] ["[sarama] producer/broker/2 state change to [open] on 6865559301320830486_obinlog/0n"]
[broker.go:212] ["[sarama] Connected to broker at test-hadoop-211.xxxxx:9092 (registered as #2)n"]
[syncer.go:257] ["write save point"] [ts=419127247691842210]
[syncer.go:257] ["write save point"] [ts=419127262987944132]
......
9.1 日志中显示从最近的InitialCommitTS开始同步tidb binlog
419127339796135937
9.2 内容是创建了一个数据库和一张表
mysql连接tidb后的操作SQL
mysql> create database binlog_kafka_test;
Query OK, 0 rows affected (0.22 sec)
mysql> use binlog_kafka_test;
Database changed
mysql> create table t1 (id int, tname varchar(55));
Query OK, 0 rows affected (0.25 sec)
10 通过Kafka客户端Kafka Tool查看默认的topic【6865559301320830486_obinlog】和接收到的消息
一次commit(一个事务)一条message;一个事务里面多条insert update delete是在一条message里面; 注意message很可能会变得很长;具体message内容是protobuf编码的一段二进制数据。
一个短的:
080110A285F0D2ADC0C2E80522380A1162696E6C6F675F6B61666B615F7465737412001A216372656174652064617461626173652062696E6C6F675F6B61666B615F74657374
一个长的:
0800108280F0EB9FCDC2E8051AE1010A490A1162696E6C6F675F6B61666B615F74657374120274311A0B0A0269641203696E7418001A120A05746E616D651207766172636861721800220F0800120B0A0210030A053203535A500A490A1162696E6C6F675F6B61666B615F74657374120274311A0B0A0269641203696E7418001A120A05746E616D651207766172636861721800220F0800120B0A0210040A053203535A500A490A1162696E6C6F675F6B61666B615F74657374120274311A0B0A0269641203696E7418001A120A05746E616D651207766172636861721800220F0800120B0A0210050A053203535A50
11 通过Java KafkaConsumer解析消费TiDB binlog数据
Java源码:https://github.com/pingcap/tidb-tools/tree/master/tidb-binlog/driver/example/kafkaReader
public class Booter {
/**
* 主题
*/
private static String topic = "6865559301320830486_obinlog";
/**
* kafka brokers
*/
private static String serever = "192.168.1.209:9092,192.168.1.210:9092,192.168.1.211:9092";
/**
* 消费者偏移量
*/
private static long offset = 60;
/**
* 消费者线程
*/
private Thread kafkaConsumerThread;
/**
* 消费者
*/
private KafkaConsumer<String, byte[]> consumer;
public static void main(String[] args) {
Booter booter = new Booter();
booter.init();
}
public void init() {
Properties props = assembleConsumerProperties();
this.consumer = new KafkaConsumer(props);
consumer.assign(Arrays.asList(new TopicPartition(Booter.topic, 0)));
kafkaConsumerThread = new Thread(() -> {
Map<TopicPartition, OffsetAndMetadata> currentOffsets = new HashMap<>();
int initOffset = 0;
while (true) {
try {
// 指定分区消费的某个offset消费
consumer.seek(new TopicPartition(Booter.topic, 0), initOffset);
ConsumerRecords<String, byte[]> records = consumer.poll(200);
for (ConsumerRecord<String, byte[]> record : records) {
try {
//处理消息
dealMessage(record.value());
currentOffsets.put(new TopicPartition(Booter.topic, record.partition()), new OffsetAndMetadata(record.offset() + 1, "no metadata"));
//提交
consumer.commitSync(currentOffsets);
currentOffsets.clear();
//记录消息offset到db
} catch (Exception ie) {
//当前消息处理失败
currentOffsets.clear();
}
}
initOffset = initOffset + records.count();
} catch (Exception e) {
currentOffsets.clear();
}
}
});
kafkaConsumerThread.setName("kafkaConsumerThread");
kafkaConsumerThread.start();
}
private void dealMessage(byte[] value) throws Exception {
// byte[]解析
BinLogInfo.Binlog binlog = BinLogInfo.Binlog.parseFrom(value);
System.out.println(binlog.toString());
}
private Properties assembleConsumerProperties() {
Properties props = new Properties();
props.put("bootstrap.servers", Booter.serever);
props.put("group.id", "mytest");
//自动提交位移关闭
props.put("enable.auto.commit", "false");
props.put("auto.commit.interval.ms", "1000");
props.put("session.timeout.ms", "30000");
props.put("max.poll.records", "10");
//必须使用ByteArrayDeserializer
props.put("key.deserializer", "org.apache.kafka.common.serialization.ByteArrayDeserializer");
props.put("value.deserializer", "org.apache.kafka.common.serialization.ByteArrayDeserializer");
return props;
}
}
12 解析出来的TiDB binlog内容
12.1 DDL创建数据库和创建表
type: DDL
commit_ts: 419127247691842210
ddl_data {
schema_name: "binlog_kafka_test"
table_name: ""
ddl_query: "create database binlog_kafka_test"
}
type: DDL
commit_ts: 419127262987944132
ddl_data {
schema_name: "binlog_kafka_test"
table_name: "t1"
ddl_query: "create table t1 (id int, tname varchar(55))"
}
12.2 DDL更改表结构
type: DDL
commit_ts: 419133161306587253
ddl_data {
schema_name: "binlog_kafka_test"
table_name: "t1"
ddl_query: "alter table t1 add column(age int)"
}
12.3 DML一个事务插入多条数据
type: DML
commit_ts: 419127690662772738
dml_data {
tables {
schema_name: "binlog_kafka_test"
table_name: "t1"
column_info {
name: "id"
mysql_type: "int"
is_primary_key: false
}
column_info {
name: "tname"
mysql_type: "varchar"
is_primary_key: false
}
mutations {
type: Insert
row {
columns {
int64_value: 3
}
columns {
string_value: "SZP"
}
}
}
}
tables {
schema_name: "binlog_kafka_test"
table_name: "t1"
column_info {
name: "id"
mysql_type: "int"
is_primary_key: false
}
column_info {
name: "tname"
mysql_type: "varchar"
is_primary_key: false
}
mutations {
type: Insert
row {
columns {
int64_value: 4
}
columns {
string_value: "SZP"
}
}
}
}
tables {
schema_name: "binlog_kafka_test"
table_name: "t1"
column_info {
name: "id"
mysql_type: "int"
is_primary_key: false
}
column_info {
name: "tname"
mysql_type: "varchar"
is_primary_key: false
}
mutations {
type: Insert
row {
columns {
int64_value: 5
}
columns {
string_value: "SZP"
}
}
}
}
}
12.4 DML更新数据
type: DML
commit_ts: 419127661866254338
dml_data {
tables {
schema_name: "binlog_kafka_test"
table_name: "t1"
column_info {
name: "id"
mysql_type: "int"
is_primary_key: false
}
column_info {
name: "tname"
mysql_type: "varchar"
is_primary_key: false
}
mutations {
type: Update
row {
columns {
int64_value: 1
}
columns {
string_value: "SZP_sum"
}
}
change_row {
columns {
int64_value: 1
}
columns {
string_value: "SZP"
}
}
}
}
}
12.5 DML删除数据
type: DML
commit_ts: 419133138237915175
dml_data {
tables {
schema_name: "binlog_kafka_test"
table_name: "t1"
column_info {
name: "id"
mysql_type: "int"
is_primary_key: false
}
column_info {
name: "tname"
mysql_type: "varchar"
is_primary_key: false
}
mutations {
type: Delete
row {
columns {
int64_value: 1
}
columns {
string_value: "SZP_sum"
}
}
}
}
}
参考
https://pingcap.com/blog-cn/tidb-binlog-source-code-reading-2/https://docs.pingcap.com/zh/tidb/dev/tiup-clusterhttps://docs.pingcap.com/zh/tidb/stable/maintain-tidb-using-tiuphttps://docs.pingcap.com/zh/tidb/stable/tidb-binlog-overviewhttps://github.com/pingcap/tidb-tools/tree/master/tidb-binlog/driver/example/kafkaReaderhttps://github.com/pingcap/tidb-tools/tree/master/tidb-binlog/driver
一个批量移动zkdatalog文件的Linux脚本
# cat /proc/version
Linux version 3.10.0-1127.18.2.el7.x86_64 (mockbuild@kbuilder.bsys.centos.org) (gcc version 4.8.5 20150623 (Red Hat 4.8.5-39) (GCC) ) #1 SMP Sun Jul 26 15:27:06 UTC 2020
# find /xxxxx/admin/zookeeper/zkdatalog/version-2 -mtime +14 -name 'log.*' | xargs -i mv {} /yyyyy/zkdatalog_backup/
往期推荐:
怎么⼀劳永逸地解决数据安全问题?
交付速度和质量问题解决了,⽼板说还得“省”
今天的数据怎么又不对?!
数据模型⽆法复⽤,归根结底还是设计问题
数据仓库、数据湖、流批一体,终于有大神讲清楚了!
如何统⼀管理纷繁杂乱的数据指标?
项目管理实战20讲笔记(网易-雷蓓蓓)
元数据中⼼的关键⽬标和技术实现⽅案
Hive程序相关规范-有助于调优
HBase内部探险-数据模型
HBase内部探险-HBase是怎么存储数据的
HBase内部探险-一个KeyValue的历险
数据中台到底怎么建设呢?
到底什么样的企业应该建设数据中台?
数据中台到底是不是大数据的下一站?