需求说明:比较两个路径下所有相同文件名的XML文件内容,将差异行输出到日志和HTML中。
XML路径:
XML内容:
输出结果如图:
日志:
HTML:
- 获取两个路径下所有的XML文件路径:
def get_xml_name(self, path): """获取所有的 XML文件名""" xml_name_list = [] parents = os.listdir(path) for xml_name in parents: xml_name_list.append(xml_name) return xml_name_list
-
读取 XML文件:
def read_xml_file(self, file_name): """读取 XML文件""" try: file_handle = open(file_name, 'r', encoding='UTF-8') text = file_handle.read().splitlines() # 读取后以行进行分割 file_handle.close() return text except IOError as error: self.logger.error('read_xml_file Error: {0}'.format(error)) sys.exit()
-
对比xml文件并输出HTML:
def compare_file(self, file1_name, file2_name, html_name): """对比xml文件并输出HTML""" text1_lines = self.read_xml_file(file1_name) text2_lines = self.read_xml_file(file2_name) diff = difflib.HtmlDiff() # 创建 diff 对象 result = diff.make_file(text1_lines, text2_lines) # 通过 make_file 方法输出源文件 HTML 的对比结果 try: with open(html_name, 'w', encoding='UTF-8') as f: # 将结果保存到文件中 f.write(result) except IOError as error: self.logger.error('write_compare_file error:{0}'.format(error))
- 完整代码:
import os import sys import difflib import datetime from bs4 import BeautifulSoup from AnhuiMobile.script.checkauth.logModule import LogClass class ComparisonXMLFileDiff(LogClass): """比较两个XML文件差异性""" def __init__(self, origin_root_path="E:\\AnhuiMobile\\AnhuiMobile\\script\\checkauth\\dir1\\", comparison_root_path="E:\\AnhuiMobile\\AnhuiMobile\\script\\checkauth\\dir2\\", html_path="E:\\AnhuiMobile\\AnhuiMobile\\script\\checkauth\\htmlresult\\", log_path="E:\\AnhuiMobile\\AnhuiMobile\\script\\checkauth\\logs\\"): """初始化""" LogClass.__init__(self, logName='ComparisonXMLFileDiff') # 日志输出到文件 self.setTimedRotatingFileHandler(fileName='ComparisonXMLFileDiff', path=log_path) # 控制台输出日志 self.setStreamHandler() # XML文件所在路径 self.origin_root_path = origin_root_path self.comparison_root_path = comparison_root_path # 输出的 HTML文件路径 self.html_path = html_path # 模板 HTML文件路径 self.base_html_path = "E:\\AnhuiMobile\\AnhuiMobile\\script\\checkauth\\" # 当前时间 self.now = datetime.datetime.strftime(datetime.datetime.now(), "%Y-%m-%d") # HTML模板list self.base_html_list = [] # 记录模板 HTML第26行为<tbody></tbody> self.count = 26 def get_xml_name(self, path): """获取所有的 XML文件名""" xml_name_list = [] parents = os.listdir(path) for xml_name in parents: xml_name_list.append(xml_name) return xml_name_list def read_xml_file(self, file_name): """读取 XML文件""" try: file_handle = open(file_name, 'r', encoding='UTF-8') text = file_handle.read().splitlines() # 读取后以行进行分割 file_handle.close() return text except IOError as error: self.logger.error('read_xml_file Error: {0}'.format(error)) sys.exit() def compare_file(self, file1_name, file2_name, html_name): """对比xml文件并输出HTML""" text1_lines = self.read_xml_file(file1_name) text2_lines = self.read_xml_file(file2_name) diff = difflib.HtmlDiff() # 创建 diff 对象 result = diff.make_file(text1_lines, text2_lines) # 通过 make_file 方法输出源文件 HTML 的对比结果 try: with open(html_name, 'w', encoding='UTF-8') as f: # 将结果保存到文件中 f.write(result) except IOError as error: self.logger.error('write_compare_file error:{0}'.format(error)) def get_base_html(self): """获取模板 HTML存入 List""" try: with open(self.base_html_path + "base_language_diff.html", "r", encoding="UTF-8") as f: for i in f.readlines(): self.base_html_list.append(i) except IOError as error: self.logger.error('get_base_html error:{0}'.format(error)) def delete_same_from_html(self, html_name, xml_name1): """剔除HTML中没有差异行,输出差异行日志""" sign = 0 # 标记是否有差异行,0表示没有 diff_html_list = [] try: with open(html_name, 'r', encoding="UTF-8") as f: lines = f.readlines() for line_html in lines: if line_html.__contains__('encoding="utf-8"') or line_html.__contains__('encoding="UTF-8"'): xml_name = """<tr><td class="diff_next"></td><td class="diff_header"></td><td nowrap="nowrap"><span style="text-align: center;display:block;color:blue;font-size:20px;background-color:#999;"> <strong>""" + xml_name1 + """</strong></span></td><td class="diff_next"></td><td class="diff_header"></td><td nowrap="nowrap" style="text-align: center;display:block; color:blue;font-size:20px;background-color:#999;"><span ><strong>""" + xml_name1 \ + """</strong></span></td></tr>""" # 将文件名拼到 HTML 中 diff_html_list.append(xml_name) # XML文件名存入list elif line_html.__contains__('<tr><td class="diff_next"'): if line_html.__contains__('class="diff_add"') or line_html.__contains__('class="diff_chg"') or \ line_html.__contains__('class="diff_sub"'): # 筛选出差异行 HTML diff_html_list.append(line_html) # 差异行 HTML存入list sign = 1 # 有差异行,标记改为 1 soup = BeautifulSoup(line_html, 'html.parser') line = soup.find("td", attrs={'class': 'diff_header'}).get_text().strip() # content1 = soup.find_all("td", attrs={'nowrap': 'nowrap'})[0].get_text().strip() # 提取差异文本 # content2 = soup.find_all("td", attrs={'nowrap': 'nowrap'})[1].get_text().strip() self.logger.debug(xml_name1 + "][line: " + line + "]") else: continue else: continue os.remove(html_name) # 移除源文件 HTML 的对比结果 if sign == 1: # 有差异行,写入模板 for diff_html in diff_html_list: self.base_html_list.insert(self.count, diff_html) # 将差异行 HTML 插入<tbody></tbody>中 self.count += 1 else: # 没有差异行,返回 return except Exception as e: self.logger.error(e) def main(self): origin_xml_name_list = self.get_xml_name(self.origin_root_path) # 获取所有的xml文件名 comparison_xml_name_list = self.get_xml_name(self.comparison_root_path) for xml_name1 in list(set(origin_xml_name_list).difference(set(comparison_xml_name_list))): # 判断对比文件夹中是否存在 self.logger.info( xml_name1 + "][" + self.origin_root_path + " -> exist" + "][" + self.comparison_root_path + " -> not_exist") for xml_name2 in list(set(comparison_xml_name_list).difference(set(origin_xml_name_list))): self.logger.info( xml_name2 + "][" + self.origin_root_path + " -> not_exist" + "][" + self.comparison_root_path + " -> exist") self.get_base_html() for xml_name1 in origin_xml_name_list: for xml_name2 in comparison_xml_name_list: if xml_name1 == xml_name2: file_path1 = self.origin_root_path + xml_name1 # 获取所有的xml文件名 file_path2 = self.comparison_root_path + xml_name2 html_name = self.html_path + self.now + "-" + str(xml_name1).split(".")[0] + '.html' self.compare_file(file_path1, file_path2, html_name) # 对比xml文件并输出HTML self.delete_same_from_html(html_name, xml_name1) # 剔除HTML中没有差异行,输出差异行日志 else: continue try: html_list = "".join(self.base_html_list) # 将模板List拼接写入 with open(self.html_path + self.now + "_language_diff.html", "w", encoding="UTF-8") as f_write: f_write.write(html_list) except IOError as error: self.logger.error('write_language_diff error:{0}'.format(error)) if __name__ == '__main__': c = ComparisonXMLFileDiff() c.main()