python实现MySQL指定表增量同步数据到clickhouse

16 篇文章 0 订阅

python实现MySQL指定表增量同步数据到clickhouse,脚本如下:

#!/usr/bin/env python3
# _*_ coding:utf8 _*_

from pymysqlreplication import BinLogStreamReader
from pymysqlreplication.row_event import (DeleteRowsEvent,UpdateRowsEvent,WriteRowsEvent,)
import clickhouse_driver
import configparser
import os
import re

configfile='repl.ini'
##########  配置文件repl.ini 操作 ##################
def create_configfile(configfile,log_file,log_pos):
    config = configparser.ConfigParser()

    if not os.path.exists(configfile):
        config['replinfo'] = {'log_file':log_file,'log_pos':str(log_pos)}

        with open(configfile,'w+') as f:
            config.write(f)

### repl.ini 写操作 ##################
def write_config(configfile,log_file,log_pos):
    config = configparser.ConfigParser()
    config.read(configfile)

    config.set('replinfo','log_file',log_file)
    config.set('replinfo','log_pos',str(log_pos))

    if  os.path.exists(configfile):
        with open(configfile,'w+') as f:
            config.write(f)
    else:
        create_configfile(configfile)

### 配置文件repl.ini 读操作 ##################
def read_config(configfile):
    config = configparser.ConfigParser()
    config.read(configfile)
    # print(config['replinfo']['log_file'])
    # print(config['replinfo']['log_pos'])
    return (config['replinfo']['log_file'],int(config['replinfo']['log_pos']))

############# clickhouse  操作 ##################
def ops_clickhouse(db,table,sql):
    column_type_dic={}
    try:
        client = clickhouse_driver.Client(host='127.0.0.1',\
                                          port=9000,\
                                          user='default',\
                                          password='clickhouse')
        # sql="select name,type from system.columns where database='{0}' and table='{1}'".format(db,table)
        print(sql.replace('= None','= NULL'))
        sql = sql.replace('= None','= NULL')
        client.execute(sql)

    except Exception as error:
        message = "获取clickhouse里面的字段类型错误. %s" % (error)
        # logger.error(message)
        print(message)
        exit(1)

# 类型转换
def mysql_clickhouse_type_map(db,table,column_name,value):
    type_map={'Int8':'toInt8',
              'Int16':'toInt16',
              'Int32':'toInt32',
              'Int64':'toInt64',
              # toInt(8|16|32|64)OrNull¶
              'Nullable(Int32)':'toInt32OrNull',
              # toInt8OrZero
              # toUInt(8 | 16 | 32 | 64)OrZero
              # toUInt(8 | 16 | 32 | 64)OrNull
              # toFloat(32 | 64)
              # toFloat(32 | 64)OrZero
              # toFloat(32 | 64)OrNull¶
              # toDate
              # toDateOrZero
              # toDateOrNull
              # toDateTime
              # toDateTimeOrZero
              # toDateTimeOrNull
              # toDecimal(32 | 64 | 128)
              'Nullable(String)':'toString'
              }
    try:
        client = clickhouse_driver.Client(host='127.0.0.1',\
                                          port=9000,\
                                          user='default',\
                                          password='clickhouse')
        sql="select type from system.columns where database='{0}' and table='{1}' and name = '{2}'".format(db,table,column_name)
        # print(sql)
        # sql = sql.replace('= None','= NULL')
        print(type_map[client.execute(sql)[0][0]]+'(\''+ str(value) +'\')')
        return type_map[client.execute(sql)[0][0]]+'(\''+ str(value) +'\')'

    except Exception as error:
        message = "获取clickhouse里面的字段类型错误. %s" % (error)
        # logger.error(message)
        print(message)
        exit(1)




MYSQL_SETTINGS = {'host':'127.0.0.1','port':13306,'user':'root','passwd':'Root@0101'}
only_events=(DeleteRowsEvent, WriteRowsEvent, UpdateRowsEvent)
def main():
    ## 每次重启时,读取上次同步的log_file,log_pos
    (log_file,log_pos) = read_config(configfile)
    #  print(log_file+'|'+ str(log_pos))
    print('-----------------------------------------------------------------------------')
    stream = BinLogStreamReader(connection_settings=MYSQL_SETTINGS, resume_stream=True, blocking=True, \
                                server_id=10,
                                 only_tables='t_repl', only_schemas='test', \
                                log_file=log_file,log_pos=log_pos, \
                                only_events=only_events, \
                                fail_on_table_metadata_unavailable=True, slave_heartbeat=10)

    try:
        for binlogevent in stream:
            # print(binlogevent)
            # print(binlogevent.rows)
            for row in binlogevent.rows:
                items={}
                ## delete操作
                if isinstance(binlogevent, DeleteRowsEvent):
                    info = dict(row["values"].items())

                    key_value = mysql_clickhouse_type_map(binlogevent.schema ,binlogevent.table,binlogevent.primary_key,info[binlogevent.primary_key])

                    # print("DELETE FROM `%s`.`%s` WHERE %s = %s ;" %(binlogevent.schema ,binlogevent.table,binlogevent.primary_key,info[binlogevent.primary_key]) )
                    # print("ALTER TABLE `%s`.`%s` DELETE WHERE %s = %s ;" %(binlogevent.schema ,binlogevent.table,binlogevent.primary_key,info[binlogevent.primary_key]) )
                    sql="ALTER TABLE `%s`.`%s` DELETE WHERE %s = %s ;" %(binlogevent.schema ,binlogevent.table,binlogevent.primary_key,key_value)

                ## update 操作
                elif isinstance(binlogevent, UpdateRowsEvent):
                    info_before = dict(row["before_values"].items())
                    info_after = dict(row["after_values"].items())
                    # info_set = str(info_after).replace(":","=").replace("{","").replace("}","")

                    info_set = None

                    # 生成 key = value 的赋值语句
                    for key,value in info_after.items():

                        if value == 'None':
                            value = 'NULL'

                        # 单字段
                        if info_set is None:
                            if re.search("'str'", str(type(value))):
                               # print("1 %s = '%s'" % (key, mysql_clickhouse_type_map(binlogevent.schema ,binlogevent.table,key,value)))
                               # info_set = "%s = '%s'" % (key,value)
                               info_set = "%s = %s" % (key, mysql_clickhouse_type_map(binlogevent.schema ,binlogevent.table,key,value))
                            else:
                               # print("2 %s = %s" % (key, value))
                               info_set = "%s = %s" % (key, mysql_clickhouse_type_map(binlogevent.schema ,binlogevent.table,key,value))

                               # info_set = "%s = %s" % (key, value)
                        else:
                        # 多字段
                            if re.search("'str'", str(type(value))):
                                # print("11 %s = '%s'" % (key, mysql_clickhouse_type_map(binlogevent.schema, binlogevent.table, key, value)))
                                # info_set = info_set + ", %s = '%s'" % (key, value)
                                info_set = "%s = %s" % (key, mysql_clickhouse_type_map(binlogevent.schema, binlogevent.table, key, value))

                            else:
                                # print("22 %s = %s" % (key, mysql_clickhouse_type_map(binlogevent.schema, binlogevent.table, key, value)))
                                # info_set = info_set + ", %s = %s" % (key, value)
                                info_set = "%s = %s" % (key, mysql_clickhouse_type_map(binlogevent.schema, binlogevent.table, key, value))


                    print(binlogevent.schema)

                    # print("UPDATE `%s`.`%s` SET %s WHERE %s = %s ;"%(binlogevent.schema,binlogevent.table,info_set,binlogevent.primary_key,info_before[binlogevent.primary_key]   )  )
                    # print("ALTER TABLE %s.%s UPDATE  %s WHERE %s = %s ;"%(binlogevent.schema,binlogevent.table,info_set,binlogevent.primary_key,info_before[binlogevent.primary_key]   )  )
                    sql = "ALTER TABLE %s.%s UPDATE  %s WHERE %s = %s ;"%(binlogevent.schema,binlogevent.table,info_set,binlogevent.primary_key,info_before[binlogevent.primary_key]   )

                ## insert 操作
                elif isinstance(binlogevent, WriteRowsEvent):
                    info = dict(row["values"].items())
                    items ={}

                    for key, value in info.items():

                        items[key]= mysql_clickhouse_type_map(binlogevent.schema, binlogevent.table, key, value)


                    # print("INSERT INTO %s.%s(%s) VALUES %s ;" % (binlogevent.schema, binlogevent.table, ','.join(items.keys()), ','.join(items.values())))
                    # sql = "INSERT INTO %s.%s(%s) VALUES %s ;"%(binlogevent.schema,binlogevent.table , ','.join(info.keys()) ,str(tuple(info.values()))  )
                    sql = "INSERT INTO %s.%s(%s) VALUES (%s );" % (binlogevent.schema, binlogevent.table, ','.join(items.keys()), ','.join(items.values()))
                ops_clickhouse('test', 't_repl',sql )

                # 当前log_file,log_pos写入配置文件
                write_config(configfile, stream.log_file, stream.log_pos)

    except Exception as e:
        print(e)
    finally:
        stream.close()

if __name__ == "__main__":
    main()
    # mysql_clickhouse_type_map('test','t_repl','c',12)



'''
BinLogStreamReader()参数
ctl_connection_settings:集群保存模式信息的连接设置
resume_stream:从位置或binlog的最新事件或旧的可用事件开始
log_file:设置复制开始日志文件
log_pos:设置复制开始日志pos(resume_stream应该为true)
auto_position:使用master_auto_position gtid设置位置
blocking:在流上读取被阻止

only_events:允许的事件数组
ignored_events:被忽略的事件数组

only_tables:包含要观看的表的数组(仅适用于binlog_format ROW)
ignored_tables:包含要跳过的表的数组

only_schemas:包含要观看的模式的数组
ignored_schemas:包含要跳过的模式的数组

freeze_schema:如果为true,则不支持ALTER TABLE。速度更快。
skip_to_timestamp:在达到指定的时间戳之前忽略所有事件。
report_slave:在SHOW SLAVE HOSTS中报告奴隶。
slave_uuid:在SHOW SLAVE HOSTS中报告slave_uuid。
fail_on_table_metadata_unavailable:如果我们无法获取有关row_events的表信息,应该引发异常
slave_heartbeat:(秒)主站应主动发送心跳连接。这也减少了复制恢复时GTID复制的流量(在许多事件在binlog中跳过的情况下)。请参阅mysql文档中的MASTER_HEARTBEAT_PERIOD以了解语义
'''

  • 0
    点赞
  • 5
    收藏
    觉得还不错? 一键收藏
  • 1
    评论
评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值