#!/usr/bin/env python
# coding:utf-8
__author__ = 'John'
import MySQLdb
import sys
import datetime
import time
class ArgsError(Exception):
def __init__(self):
print ( "\nUsage : %s 2012 02\n") % sys.argv[0]
class ClassMigrate(object):
"""
1、前提:
a.target_db需要有个模板表,target_tab将根据模板表创建
b.如果按月迁移,则需要输入年份和月份,对应self.year = sys.argv[1],self.month = sys.argv[2]参数
"""
def __init__(self):
#self.year = sys.argv[1]
self.year = '2016'
self.leap = int(self.year) % 4
self.year_length = len(self.year)
#self.month = sys.argv[2]
self.month = '03'
self.mon_length = len(self.month)
self.source_host = '192.168.1.41'
self.source_user = 'root'
self.source_password = 'xxx'
self.source_port = 53306
self.source_db = 'archive_db'
self.source_conn_str = MySQLdb.connect(host=self.source_host, port=self.source_port, user=self.source_user, passwd=self.source_password, db=self.source_db, charset='utf8')
self.source_conn_str.autocommit(True)
self.source_tab = self.source_db + '.t_hist_201603_innodb'
self.target_db = 'test'
self.template_tab = self.target_db + '.t_hist_template_archive'
self.target_tab = self.target_db + '.t_hist_' + self.year + self.month
""" 目标库配置信息只是在需要迁移到远程数据库的时候才需要配置,如果迁移到本地库,则不会读取目标库的配置 """
self.target_host = '192.168.1.220'
self.target_user = 'root'
self.target_password = 'xxx'
self.target_port = 53306
self.target_conn_str = MySQLdb.connect(host=self.target_host, port=self.target_port, user=self.target_user, passwd=self.target_password, db=self.target_db, charset='utf8')
self.target_conn_str.autocommit(True)
self.pk = 'auto_id'
self.date_col = 'ut'
self.step_size = 20000
"""_migState默认为False,不要更改。migMethodSelected用来选择迁移方式。
deleteMethodSelected用于选择源库删除方式,
!! 请谨慎选择delete方式,因为本程序会分批次删除数据并提交 !!
而'drop daily partition'则是针对每天一个分区表的,只输出drop partition语句,并不执行"""
self._migState = False
self.migMethod = ('monthly_2_local', 'monthly_2_remote', 'whole_2_local', 'whole_2_remote')
self.migMethodSelected = self.migMethod[3]
self.deleteMethod = ('delete', 'drop daily partition')
self.deleteMethodSelected = self.deleteMethod[1]
self.source_cnt = ''
self.source_min_id = ''
self.source_max_id = ''
def source_query(self, sql, sql_type):
try:
cr = self.source_conn_str.cursor()
cr.execute(sql)
if sql_type == 'select':
return cr.fetchall()
elif sql_type == 'dml':
rows = self.source_conn_str.affected_rows()
self.source_conn_str.commit()
return rows
else:
return True
except Exception, e:
print (str(e))
return False
finally:
cr.close()
def target_query(self, sql, sql_type, values=''):
try:
cr = self.target_conn_str.cursor()
if sql_type == 'select':
cr.execute(sql)
return cr.fetchall()
elif sql_type == 'insertmany':
cr.executemany(sql, values)
rows = self.target_conn_str.affected_rows()
self.target_conn_str.commit()
return rows
else:
cr.execute(sql)
return True
except Exception, e:
print (str(e))
return False
finally:
cr.close()
def get_day_end(self):
try:
if self.year_length != 4 or self.mon_length != 2:
raise ArgsError
if self.month in ('01', '03', '05', '07', '08', '10', '12'):
self.day_end = 31
else:
self.day_end = 30
if self.leap == 0:
if self.month == '02':
self.day_end = 29
else:
if self.month == '02':
self.day_end = 28
except Exception, e:
print (str(e))
def get_min_max(self, which_db, time_range_sql=''):
""" 无论哪种迁移方式,都会执行本函数创建目标、并获取源表需要迁移的总条数、最小id、最大id
:param which_db: 选择在哪个库执行create table SQL
:param time_range_sql: 附带的时间限制语句
"""
try:
dbs = {'source_query': self.source_query, 'target_query': self.target_query}
print ("\nStarting Migrate at -- %s") % (datetime.datetime.now().__str__())
sql = 'CREATE TABLE %s like %s' % (self.target_tab, self.template_tab)
if dbs[which_db](sql, 'ddl'):
pass
else:
raise Exception
sql = "select count(*),IFNULL(min(%s),-1),IFNULL(max(%s),-1) from %s" % (self.pk, self.pk,self.source_tab)
sql += time_range_sql
q = self.source_query(sql, 'select')
self.source_cnt = q[0][0]
self.source_min_id = q[0][1]
self.source_max_id = q[0][2]
return True
except Exception, e:
return False
print (str(e))
def migrate_2_local(self, time_range_sql=''):
try:
k = self.source_min_id
if self.source_cnt != 0 and self.source_min_id != None and self.source_max_id != None:
while k <= self.source_max_id:
sql = "insert into %s select id, outcomeFK, odds_providerFK, odds, odds_old, active, is_back, is_single, is_live, volume, currency, couponKey, del, n, ut, ADD_TIME, NULL from %s where %s>=%d and %s<%d " % (self.target_tab, self.source_tab, self.pk, k, self.pk, k+self.step_size)
sql += time_range_sql.replace('where', 'and')
print ("\n %s") % sql
starttime = datetime.datetime.now()
rows = self.source_query(sql, 'dml')
print ("Inserted %s rows") % str(rows)
endtime = datetime.datetime.now()
timeinterval = endtime - starttime
print("Elapsed :" + str(timeinterval.seconds) + '.' + str(timeinterval.microseconds) + " seconds")
k += self.step_size
except Exception, e:
print (str(e))
finally:
print ("\nInsert complete Migration at -- %s") % (datetime.datetime.now().__str__())
def migrate_2_remote(self, time_range_sql=''):
try:
k = self.source_min_id
if self.source_cnt != 0 and self.source_min_id != None and self.source_max_id != None:
while k <= self.source_max_id:
sql = "select id, outcomeFK, odds_providerFK, odds, odds_old, active, is_back, is_single, is_live, volume, currency, couponKey, del, n, ut, ADD_TIME, NULL from %s where %s>=%d and %s<%d " % (self.source_tab, self.pk, k, self.pk, k+self.step_size)
orderby = " order by %s" % self.pk
sql += time_range_sql.replace('where', 'and')
sql += orderby
print ("\n %s") % sql
starttime = datetime.datetime.now()
results = self.source_query(sql, 'select')
sql = "insert into " + self.target_tab + " values (%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)"
rows = self.target_query(sql, 'insertmany', results)
if rows == False:
print ("Insert failed!!")
else:
print ("Inserted %s rows.") % rows
endtime = datetime.datetime.now()
timeinterval = endtime - starttime
print("Elapsed :" + str(timeinterval.seconds) + '.' + str(timeinterval.microseconds) + " seconds")
k += self.step_size
except Exception, e:
print (str(e))
finally:
print ("\nInsert complete Migration at -- %s") % (datetime.datetime.now().__str__())
def delete_data(self, time_range_sql=''):
try:
k = self.source_min_id
if self.source_cnt != 0 and self.source_min_id != None and self.source_max_id != None:
while k <= self.source_max_id:
sql = "delete from %s where %s>=%d and %s<%d " % (self.source_tab, self.pk, k, self.pk, k+self.step_size)
sql += time_range_sql.replace('where', 'and')
print ("\n %s") % sql
starttime = datetime.datetime.now()
rows = self.source_query(sql, 'dml')
if rows == False:
print ("Delete failed!!")
else:
print ("Deleted %s rows.") % rows
endtime = datetime.datetime.now()
timeinterval = endtime - starttime
print("Elapsed :" + str(timeinterval.seconds) + '.' + str(timeinterval.microseconds) + " seconds")
time.sleep(1)
k += self.step_size
except Exception, e:
print (str(e))
finally:
print ("\nDelete complete Migration at -- %s") % (datetime.datetime.now().__str__())
def verify_total_cnt(self, which_db, time_range_sql=''):
try:
dbs = {'source_query': self.source_query, 'target_query': self.target_query}
sql = "select count(*) from %s " % (self.source_tab)
sql += time_range_sql
source_total_cnt = self.source_query(sql, 'select')
print ("\n")
print ("====="*5)
print ("source_total_cnt: %s") % source_total_cnt[0][0]
sql = "select count(*) from %s " % (self.target_tab)
sql += time_range_sql
target_total_cnt = dbs[which_db](sql, 'select')
print ("target_total_cnt: %s") % target_total_cnt[0][0]
print ("====="*5)
if source_total_cnt == target_total_cnt and source_total_cnt[0][0] != 0 and target_total_cnt[0][0] != 0:
print ("\nFinal result: Successfully!!")
self._migState = True
except Exception, e:
print (str(e))
def drop_daily_partition(self, which_db):
try:
dbs = {'source_query': self.source_query, 'target_query': self.target_query}
if self._migState:
for i in range(1, self.day_end+1):
day = str(i)
if i < 10:
day = '0' + str(i)
sql = "select count(*) from %s where %s >='%s-%s-%s 00:00:00' and %s <= '%s-%s-%s 23:59:59'" % (self.source_tab, self.date_col, self.year, self.month, day, self.date_col, self.year, self.month, day)
source_day_cnt = self.source_query(sql, 'select')
sql = "select count(*) from %s where %s >='%s-%s-%s 00:00:00' and %s <= '%s-%s-%s 23:59:59'" % (self.target_tab, self.date_col, self.year, self.month, day, self.date_col, self.year, self.month, day)
target_day_cnt = dbs[which_db](sql, 'select')
if source_day_cnt == target_day_cnt:
print ("alter table %s drop partition p%s%s%s;") % (self.source_tab, self.year, self.month, day)
else:
print("\nsource_day_cnt: %s") % source_day_cnt[0][0]
print("target_day_cnt: %s") % target_day_cnt[0][0]
break
except Exception, e:
print (str(e))
f = ClassMigrate()
if f.migMethodSelected in ('monthly_2_local', 'monthly_2_remote'):
f.get_day_end()
time_range_sql = " where %s >='%s-%s-01 00:00:00' and %s <= '%s-%s-%s 23:59:59'" % (f.date_col, f.year, f.month, f.date_col, f.year, f.month, f.day_end)
if f.migMethodSelected == 'monthly_2_local':
if f.get_min_max('source_query', time_range_sql):
f.migrate_2_local(time_range_sql)
f.verify_total_cnt('source_query', time_range_sql)
if f.deleteMethodSelected == 'delete':
f.delete_data(time_range_sql)
else:
f.drop_daily_partition('source_query')
else:
print ("Cannot get count, min_id, max_id! Exit.")
exit()
else:
if f.get_min_max('target_query', time_range_sql):
f.migrate_2_remote(time_range_sql)
f.verify_total_cnt('target_query', time_range_sql)
if f.deleteMethodSelected == 'delete':
f.delete_data(time_range_sql)
else:
f.drop_daily_partition('target_query')
else:
print ("Cannot get count, min_id, max_id! Exit.")
exit()
elif f.migMethodSelected in ('whole_2_local', 'whole_2_remote'):
if f.migMethodSelected == 'whole_2_local':
if f.get_min_max('source_query'):
f.migrate_2_local()
f.verify_total_cnt('source_query')
else:
print ("Cannot get count, min_id, max_id! Exit.")
exit()
else:
if f.get_min_max('target_query'):
f.migrate_2_remote()
f.verify_total_cnt('target_query')
else:
print ("Cannot get count, min_id, max_id! Exit.")
exit()
else:
print ("self.migMethodSelected must in ('monthly_2_local', 'monthly_2_remote', 'whole_2_local', 'whole_2_remote') !")