mysql-binlog-client基于mysql-replication模拟slave,监听binlog的变化,包括增加、修改、删除 。创建binlog流对象,根据监听事件获取变化的数据,进行数据同步。假设以ClickHouse为目标库,要求每秒提交在1000条以上,建议每次提交10万条。 需要设置每次提交的上限比如:1000条。 当数据没有变化,但数据数量不及上限时,也需要进行提交,比如:3秒。
目录结构如下:
settings.py
import os
from os.path import abspath, dirname, join
from starlette.config import Config
from starlette.datastructures import URL
config = Config(".env")
# git 版本控制相关环境变量
GIT_COMMIT = config(
'GIT_COMMIT', cast=str, default="Unknown")
GIT_COMMIT_SHORT = config(
'GIT_COMMIT_SHORT', cast=str, default="Unknown")
GIT_LOCAL_BRANCH = config(
'GIT_LOCAL_BRANCH', cast=str, default="Unknown")
BRANCH_NAME = config(
'BRANCH_NAME', cast=str, default="Unknown")
DOCKER_IMAGE_NAME = config(
'DOCKER_IMAGE_NAME', cast=str, default="Unknown")
DOCKER_IMAGE_VERSION = config(
'DOCKER_IMAGE_VERSION', cast=str, default="Unknown")
# app
APP_ENV = config('APP_ENV', cast=str, default="products")
# debug
DEBUG = config('DEBUG', cast=bool, default=False)
# mysql
MYSQL_HOST = config('MYSQL_HOST', cast=str, default="127.0.0.1")
MYSQL_PORT = config('MYSQL_PORT', cast=int, default="3306")
MYSQL_USER = config('MYSQL_USER', cast=str, default="root")
MYSQL_PASSWORD = config('MYSQL_PASSWORD', cast=str, default="root")
main.py
import datetime
import json
import time
from app.settings import MYSQL_HOST, MYSQL_PORT, MYSQL_PASSWORD, MYSQL_USER
from pymysqlreplication import BinLogStreamReader
from pymysqlreplication.row_event import (
DeleteRowsEvent,
UpdateRowsEvent,
WriteRowsEvent
)
# 配置数据库信息
mysql_settings = {
'host': MYSQL_HOST,
'port': MYSQL_PORT,
'user': MYSQL_USER,
'passwd': MYSQL_PASSWORD
}
def main():
# 实例化binlog 流对象
stream = BinLogStreamReader(
connection_settings=mysql_settings,
server_id=100, # slave标识,唯一
blocking=True, # 阻塞等待后续事件
# 设定只监控写操作:增、改
only_events=[
WriteRowsEvent,
UpdateRowsEvent
]
)
start=time.time()
table_data_dict={}
for binlogevent in stream:
# binlogevent.dump() # 打印所有信息
for row in binlogevent.rows:
# 打印 库名 和 表名
event = {"schema": binlogevent.schema, "table": binlogevent.table}
if isinstance(binlogevent, WriteRowsEvent):
event["action"] = "insert"
event["data"] = row["values"]
table_name=binlogevent.table
#print(event)
data_list=table_data_dict.get(table_name,[])
data_list.append(event["data"])
table_data_dict[table_name]=data_list
end=time.time()
cost =int(end-start)
if len(data_list)>1000 or cost>3:
print(cost,table_name,len(data_list))
start=time.time()
table_data_dict[table_name]=[]
elif isinstance(binlogevent, UpdateRowsEvent):
event["action"] = "update"
event["data"] = row["after_values"]
table_name=binlogevent.table
#print(event)
data_list=table_data_dict.get(table_name,[])
data_list.append(event["data"])
table_data_dict[table_name]=data_list
end=time.time()
cost =int(end-start)
if len(data_list)>1000 or cost>3:
print(cost,table_name,len(data_list))
start=time.time()
table_data_dict[table_name]=[]
else:
pass
if __name__ == '__main__':
main()
Dockerfile
FROM digglerz/python3.8
COPY . /app
RUN pip install -i https://mirrors.aliyun.com/pypi/simple/ -r /app/requirements.txt --no-cache-dir --quiet
WORKDIR /app
ENV PYTHONPATH=/app
ARG GIT_COMMIT
ARG GIT_COMMIT_SHORT
ARG GIT_LOCAL_BRANCH
ARG BRANCH_NAME
ARG DOCKER_IMAGE_NAME
ARG DOCKER_IMAGE_VERSION
ENV GIT_COMMIT=${GIT_COMMIT} \
GIT_COMMIT_SHORT=${GIT_COMMIT_SHORT} \
GIT_LOCAL_BRANCH=${GIT_LOCAL_BRANCH} \
BRANCH_NAME=${BRANCH_NAME} \
DOCKER_IMAGE_NAME=${DOCKER_IMAGE_NAME} \
DOCKER_IMAGE_VERSION=${DOCKER_IMAGE_VERSION}
CMD ["python","app/main.py"]
requirements.txt
mysql-replication
starlette
构建Docker镜像
docker build -t jackyqs/mysql-binlog-client .
注意:命令行最后面的.代表是基于当前目录构建。