python3实现金融文件比对(实现简化版FFReader的基础上,增加比对功能)
需求
- 使用FFReader的config文件,解析传入文件的内容(包括Dictionary和TipDictionary内容)。
- 对比传入两文件除路径外的基本信息,基本信息包括[‘文件路径’, ‘发送方代码’, ‘发送方信息’, ‘接收方代码’, ‘接收方信息’, ‘使用的配置’, ‘文件类型’, ‘文件描述’, ‘文件传递日期’, ‘解析器配置’],不是全部文件都包含上述所有的基本信息。
- 将文件内容的列分组,分为[‘强控条件’, ‘普通条件’, ‘可忽略条件’],将传入的两个文件的数据行进行匹对:强控条件若不同,一票否决;强控条件系统,普通条件不同最少的匹配为一对;当其他条件完全相同时,再考虑可忽略条件。
- 提供类似Beyond Compare的界面显示比对结果(我对前端不感兴趣,这里使用HTML显示)。
代码
import os
import re
import time
import webbrowser
INI_PATH = r'F:\FFReader\config\\'
OFD_CodeInfo = 'OFD_CodeInfo.ini'
BREAK_BASIC_CONDITION = ['文件类型']
PRIORITY_CONDITION_LIST = []
IGNORE_CONDITION_LIST = ['序号', '*流水号*', '交易发生时间']
class interface_difference:
interface_info1 = interface_info2 = None
basic_msg_diff = {
}
col_names_level = [[],[],[]]
data_diff = {
}
data_no_match = [[], []]
col_map = [[[], [], []], [[], [], []]]
__is_init_diff_data__ = False
def __init__(self, id1, id2):
self.interface_info1, self.interface_info2 = id1, id2
def __init_diff_data__(self):
if self.__is_init_diff_data__:
return
id1, id2 = self.interface_info1, self.interface_info2
if id1.basic_msg_dict['文件路径'].split('\\')[-1] != id2.basic_msg_dict['文件路径'].split('\\')[-1]:
self.basic_msg_diff['文件路径'] = [id1.basic_msg_dict['文件路径'].split('\\')[-1],
id1.basic_msg_dict['文件路径'].split('\\')[-1]]
for k in list(id1.basic_msg_dict.keys())[1:]:
if id1.basic_msg_dict[k] != id2.basic_msg_dict[k]:
self.basic_msg_diff[k] = [id1.basic_msg_dict[k], id2.basic_msg_dict[k]]
if k in BREAK_BASIC_CONDITION:
self.data_no_match = [[l for l in range(len(id1.data_dict))], [l for l in range(len(id1.data_dict))]]
return
col_names = set(id1.col_names)
col_names.update(id2.col_names)
col_names = list(col_names)
col_names_level = [[], [], []]
self.col_names_level = col_names_level
id1_data, id2_data = [[[] for i in range(3)] for j in range(len(id1.data_dict))], \
[[[] for i in range(3)] for j in range(len(id2.data_dict))]
def append_id_date(n, idn, id_data, col_map):
cns = idn.col_names
if c in cns:
col_index = cns.index(c)
col_map[n-1][c_flag].append(col_index)
for i_d_i in range(len(idn.data_dict)):
id_data[i_d_i][c_flag].append(idn.data_dict[i_d_i][col_index])
else:
col_map[n - 1][c_flag].append(None)
for i_d_i in range(len(idn.data_dict)):
id_data[i_d_i][c_flag].append(None)
for c in col_names:
c_flag = 0 if is_in_condition(PRIORITY_CONDITION_LIST, c) else \
(2 if is_in_condition(IGNORE_CONDITION_LIST, c) else 1)
col_names_level[c_flag].append(c)
append_id_date(1, id1, id1_data, self.col_map)
append_id_date(2, id2, id2_data, self.col_map)
matching_col_matrix = [[[[] for k in range(3)] for j in range(len(id2_data))] for i in
range(len(id1_data))]
for d_i in range(len(id1_data)):
for d_j in range(len(id2_data)):
for l in range(3):
for c_i in range(len(col_names_level[l])):
if id1_data[d_i][l][c_i] != id2_data[d_j][l][c_i]:
matching_col_matrix[d_i][d_j][l].append(c_i)
match_degree = []
for i in range(len(id1_data)):
for j in range(len(id2_data)):
if matching_col_matrix[i][j][0]:
pass
elif matching_col_matrix[i][j] is not None:
match_degree.append([(len(matching_col_matrix[i][j][1]), len(matching_col_matrix[i][j][2])), i, j])
match_degree.sort()
match_results = [[], []]
not_match_lines = [[i for i in range(len(id1_data))], [i for i in range(len(id2_data))]]
for i in match_degree:
if i[1] in match_results[0