1.需求来源,有两个环境:生产,演示。需要将生产的指定表数据同步至演示环境
2.思考:
1.实时性要求不是特别高,但也不能太低
2.如果是全表同步,有的表数据量很大,可能同步效率低
3.通过比对数据差异,只同步增量数据
4.编写python脚本,通过crontab 配置定时调度
代码如下:
# -*- coding: UTF-8 -*-
import datetime
import requests
import pymysql
import json
#发送钉钉消息
def send_dingding_msg(msg):
headers = {"Content-Type": "application/json;charset=utf-8"}
data = {
"msgtype": "text",
"text": {
"content": msg
}
}
r = requests.post(dingding_webhook_url, headers=headers, json=data)
if r.status_code == 200:
print("钉钉消息发送成功")
else:
print("钉钉消息发送失败")
# 定义钉钉机器人的Webhook地址
dingding_webhook_url = "https://oapi.dingtalk.com/robot/send?access_token=xxxxxxxxxxx"
source_table="CITY_OUTFLOW_TRAFFIC_DAILY_PRD_"
target_table="CITY_OUTFLOW_TRAFFIC_DAILY_PRD_"
now = datetime.datetime.now()
year = now.year
month = now.month
source_table=source_table+"{}{}".format(year,month)
target_table=target_table+"{}{}".format(year,month)
print("source_table=="+source_table)
print("target_table=="+target_table)
# 连接到MySQL数据库
source_conn = pymysql.connect(
host='数据库ip',
user='prd',
port=3306,
password='密码',
database='prd'
)
source_conn = pymysql.connect(
host='数据库ip',
user='pre',
port=3306,
password='密码',
database='pre'
)
results=None
# 查询目标库目标表的最大id
targetCur = target_conn.cursor()
maxIdQuerysql = "select id from {} order by id desc limit 1".format(target_table)
try:
targetCur.execute(maxIdQuerysql, None)
results = targetCur.fetchall()
except Exception as e:
targetCur.close()
target_conn.close()
# 发送钉钉消息
message = {
"msgtype": "text",
"text": {
"content": "演示环境聚合数据同步:{}表数据异常{}".format(target_table, repr(e), now.__format__('%Y年%m月-%d日 %H:%M:%S'))
}
}
headers = {'Content-Type': 'application/json'}
requests.post(dingding_webhook_url, data=json.dumps(message), headers=headers)
maxId=0
if(results!=None):
for row in results:
maxId = row[0]
#查询数据
page=1
pageSize=10000
start=0
sourceCur = source_conn.cursor()
targetCur = target_conn.cursor()
totalCount=0
while(start>=0):
sourceResults=None
start = (page - 1) * pageSize;
sourceQuerySql = "select id,CITY,DELTA_OUT_TRAFFIC_SUM_DAY,DELTA_OUT_TRAFFIC_AVG_DAY,DAY from {} where id>{} order by id asc limit {},{}".format(source_table, maxId, start, pageSize)
try:
sourceCur.execute(sourceQuerySql, None)
sourceResults = sourceCur.fetchall()
except Exception as e:
# 发送钉钉消息
message = {
"msgtype": "text",
"text": {
"content": "演示环境聚合数据同步:{}表数据异常{}".format(target_table, repr(e),now.__format__('%Y年%m月-%d日 %H:%M:%S'))
}
}
headers = {'Content-Type': 'application/json'}
requests.post(dingding_webhook_url, data=json.dumps(message), headers=headers)
page+=1
if len(sourceResults) == 0:
start=-1
sourceCur.close()
source_conn.close()
else:
totalCount+=len(sourceResults)
insertSql = "INSERT INTO {}(id,CITY,DELTA_OUT_TRAFFIC_SUM_DAY,DELTA_OUT_TRAFFIC_AVG_DAY,DAY)" \
" VALUES(%s,%s,%s,%s,%s)".format(target_table)
# 在目标数据库上执行INSERT语句
print(insertSql)
try:
targetCur.executemany(insertSql, sourceResults)
target_conn.commit()
except Exception as e:
# 发送钉钉消息
message = {
"msgtype": "text",
"text": {
"content": "演示环境聚合数据同步:{}表数据异常{}".format(target_table, repr(e),
now.__format__('%Y年%m月-%d日 %H:%M:%S'))
}
}
headers = {'Content-Type': 'application/json'}
requests.post(dingding_webhook_url, data=json.dumps(message), headers=headers)
if(totalCount>0):
# 发送钉钉消息
message = {
"msgtype": "text",
"text": {
"content": "演示环境聚合数据同步:{}表数据同步成功{},同步记录共{}行".format(target_table,now.__format__('%Y年%m月-%d日 %H:%M:%S'),totalCount)
}
}
headers = {'Content-Type': 'application/json'}
requests.post(dingding_webhook_url, data=json.dumps(message), headers=headers)
targetCur.close()
target_conn.close()