常用的三个模块:
1、正则匹配 re;
2、配置文件解析 configparser;
3、excel 持久化 xlsxwriter;
做个集锦备忘,同时实现了简单的表名规则引擎。
参考文档:
https://xlsxwriter.readthedocs.io/contents.html
import configparser
import re
import xlsxwriter
import time
# 命名规则匹配类
class MyRule:
_rule_dict = None
# 规则由两个维度 bk/fc 进行索引
def __init__(self, bk_list, fc_list):
self._rule_dict = dict()
for bk in bk_list:
self._rule_dict[bk] = dict()
for fc in fc_list:
self._rule_dict[bk][fc] = list()
# 注册规则,支持模糊注册
def reg(self, rule, bk=None, fc=None):
if bk is None and fc is not None:
for _bk in self._rule_dict.keys():
self._rule_dict[_bk][fc].append(rule)
elif bk is not None and fc is None:
for _fc in self._rule_dict[bk].keys():
self._rule_dict[bk][_fc].append(rule)
elif bk is None and fc is None:
for _bk in self._rule_dict.keys():
for _fc in self._rule_dict[_bk].keys():
self._rule_dict[_bk][_fc].append(rule)
elif bk is not None and fc is not None:
self._rule_dict[bk][fc].append(rule)
# 正则表达式,规则匹配
def match(self, bk, fc, s):
for r in self._rule_dict[bk][fc]:
res = re.fullmatch(r, s)
if res is None:
return False, r
elif res.string != s:
return False, r
return True, None
# excel 持久化类
class MyXlsx:
_workbook = None
def __init__(self, file_name):
self._workbook = xlsxwriter.Workbook(file_name) # 创建新的 xlsx 文件,如果文件已存在,内容会被清空
def write_xlsx(self, sheet_name, title, data):
worksheet = self._workbook.add_worksheet(sheet_name) # 添加新的 sheet
worksheet.write_row('A1', title) # 添加标题列
i = 1
for row in data:
worksheet.write_row(i, 0, row) # 写入一行
i += 1
def close(self):
self._workbook.close()
def main():
cf = configparser.ConfigParser()
cf.read('table_name_std.ini') # 读取配置文件
bk_list = cf.sections() # 读取分区标识,即: [xxxx] 的内容
res_xls = MyXlsx(time.strftime('命名规范%Y%m%d.xlsx'))
title = ('总表数', '英文表名异常数量', '英文表名异常比例')
rule_en = MyRule(bk_list, fc_pro) # 英文表名规则
# 英文名:后缀规则
rule_en.reg('.*_(' + '|'.join(pt_sign) + ')$')
result_total_data = []
result_detail_data = {}
for bk in bk_list:
fc_list = cf.options(bk) # 分层列表
table_cnt = table_en_fail_cnt = table_ch_fail_cnt = 0
for fc in fc_list:
project_name = cf.get(bk, fc)
tables = list_tables(project=project_name)
for table in tables:
res_en, r_en = rule_en.match(bk, fc, table.name)
res_ch, r_ch = rule_ch.match(bk, fc, table.comment)
table_en_fail_cnt += 0 if res_en else 1
table_ch_fail_cnt += 0 if res_ch else 1
table_cnt += 1
print(bk, table_cnt, table_en_fail_cnt, table_ch_fail_cnt)
result_total_data.append((bk, table_cnt,
table_en_fail_cnt, '%.2f%%' % (table_en_fail_cnt/table_cnt),
table_ch_fail_cnt, '%.2f%%' % (table_ch_fail_cnt/table_cnt)))
res_xls.write_xlsx('概括', title, result_total_data)
res_xls.close()
if __name__ == '__main__':
main()