@ (python验证数据表的一致性)
用到的模块
pymysql --第三方 需要安装
configparser
背景
由于表之间没有使用外键,最近出现一些异常数据,需要把没有关联到的数据整理出来。设计思路如下:
- 使用配制文件读取DB连接信息;
- 创建配置表 记录主子表名和关联字段,另外记录当前执行的id,避免每次都要重新关联;
- 创建结果表记录异常数据;
config
$ cat config.cnf
[default]
host=192.68.4.40
user=username
password=password
db=db_name
port=3306
charset=utf8
建表语句
–config tablee
CREATE TABLE t_config_checkforeign_relation
(
id
int(11) NOT NULL AUTO_INCREMENT,
s_tabname
varchar(64) NOT NULL, --base table
s_colname
varchar(64) NOT NULL, --base colname
d_tabname
varchar(64) NOT NULL, --referrence table
d_colname
varchar(64) NOT NULL,
is_valid
smallint(6) DEFAULT ‘1’, – 1 valid
last_id
bigint(20) DEFAULT ‘0’, --last_id update every time
PRIMARY KEY (id
)
) ENGINE=InnoDB AUTO_INCREMENT=9 DEFAULT CHARSET=utf8
–result table
CREATE TABLE t_config_checkforeign_record
(
id
bigint(20) NOT NULL AUTO_INCREMENT,
d_tabname
varchar(64) DEFAULT NULL, --referrent tablename
d_column
varchar(64) DEFAULT NULL, --reference colname
d_columnval
varchar(64) DEFAULT NULL, --max value
last_id
bigint(20) DEFAULT ‘0’,
check_time
datetime DEFAULT NULL, --exec time
check_line
bigint(20) DEFAULT NULL, --influent line
PRIMARY KEY (id
)
) ENGINE=InnoDB
code
$ cat readconfig.py
import configparser
import os
class ReadConfig:
def __init__(self,filepath=None):
if filepath:
configpath = filepath
else:
root_dir = os.path.dirname(os.path.abspath('.'))
configpath = os.path.join(root_dir,'config.cnf')
print(root_dir)
self.cf = configparser.ConfigParser()
self.cf.read(configpath)
def get_db(self,param) -> str:
if self.cf.has_section('default'):
return self.cf.get('default',param)
else:
return self.cf.sections()
return '-1'
if __name__ == '__main__':
test=ReadConfig('config.cnf')
t = test.get_db('host')
print(t)
**$ cat check_foreign_val.py **
import pymysql
import time
import readconfig
import sys
class HandleMysql:
def __init__(self):
self.data = readconfig.ReadConfig('config.cnf')
#连接
def conn_mysql(self):
host = self.data.get_db('host')
port = int(self.data.get_db('port'))
user = self.data.get_db('user')
password = self.data.get_db('password')
db = self.data.get_db('db')
charset = self.data.get_db('charset')
self.conn = pymysql.connect(host=host,port=port,user=user,password=password,database=db,charset=charset)
self.cur = self.conn.cursor()
self.cur.execute('use `vipcoding-db`')
#关闭连接
def close_mysql(self):
self.cur.close()
self.conn.close()
#执行sql
def execute_sql(self,sql,data):
self.conn_mysql()
self.cur.execute(sql)
self.conn.commit()
def search(self,sql):
self.conn_mysql()
self.cur.execute(sql)
return self.cur.fetchall()
#查询一条数据
def searchone(self,sql):
self.conn_mysql()
self.cur.execute(sql)
return self.cur.fetchone()
def select(self,sql,table,column,condition=''):
condition = ' where '+condition if condition else None
if condition:
sql = sql.format(column,table,condition)
else:
sql = sql.format(column,table)
return self.search(sql)
#查询一条记录
def getIdFromConfigTable(sql:str) -> str:
sqlhandler = HandleMysql()
ret = sqlhandler.searchone(sql)
sqlhandler.close_mysql()
return ret;
#执行dml语句
def executesql(sql:str):
sqlhandler = HandleMysql()
sqlhandler.execute_sql(sql,'')
sqlhandler.close_mysql()
if __name__ == '__main__':
#需要判空 所以用元组接结果
(c_min,) = getIdFromConfigTable('select min(id) from t_config_checkforeign_relation ')
(c_max,) = getIdFromConfigTable('select max(id) from t_config_checkforeign_relation ')
if c_max is None:
print('Please set config table [t_config_checkforeign_relation] \n')
sys.exit(0)
#get min and max value for loop check data between tables
#print(c_max)
for p_id in range(c_min,c_max+1):
# s_tabname: basetable d_tabname:referrencetable record:t_config_checkforeign_record
sql ='select id, s_tabname,s_colname,d_tabname,d_colname,last_id from t_config_checkforeign_relation where is_valid=1 and id= ' +str(p_id)
configresult = getIdFromConfigTable(sql)
#print(configresult)
if configresult:
#get maxid from dest_table insert into record table
maxval_sql = 'select max(id) from '+ configresult[3]
(maxid_d,) = getIdFromConfigTable(maxval_sql)
current_time = time.strftime('%Y-%m-%d %H:%M:%S',time.localtime())
#insert diff result to record table
insert_sql = " insert into t_config_checkforeign_record(d_tabname,d_column,d_columnval,last_id,check_time,check_line) select '{}','{}',{},'{}','{}',count({}) from {} b left join {} a on b.{}=a.{} where b.{} > {} and a.id is null group by {}".format(configresult[3],configresult[4],configresult[4],maxid_d,current_time,configresult[4],configresult[3],configresult[1],configresult[4],configresult[2],configresult[2],configresult[5],configresult[4])
executesql(insert_sql)
#update config table max id
update_sql = 'update {} set {} = {} where {} = {}'.format('t_config_checkforeign_relation','last_id',maxid_d,'id',str(p_id))
executesql(update_sql)
else:
continue
print('select t_config_checkforeign_record check done!\n')
运行结果
存在问题
1.表之间如果存在相同的字段 需要加上别名,否则会报错。因为我们表中都是用某字段引用id 所以不存在这个问题。
2.配制文件代码中指定位置,如果想更改路径,请改代码。
3.没有Try catch 后续改进。
参考:https://www.cnblogs.com/zhujingzhi/p/9685892.html