利用difflib模块比较两个XML文件内容之间差异性输出到HTML

 需求说明:比较两个路径下所有相同文件名的XML文件内容,将差异行输出到日志和HTML中。

    XML路径:

XML内容:

 

     输出结果如图:

日志:

HTML:

  1. 获取两个路径下所有的XML文件路径:
        def get_xml_name(self, path):
            """获取所有的 XML文件名"""
            xml_name_list = []
            parents = os.listdir(path)
            for xml_name in parents:
                xml_name_list.append(xml_name)
            return xml_name_list

     

  2. 读取 XML文件:
        def read_xml_file(self, file_name):
            """读取 XML文件"""
            try:
                file_handle = open(file_name, 'r', encoding='UTF-8')
                text = file_handle.read().splitlines()  # 读取后以行进行分割
                file_handle.close()
                return text
            except IOError as error:
                self.logger.error('read_xml_file Error: {0}'.format(error))
                sys.exit()

     

  3. 对比xml文件并输出HTML:
        def compare_file(self, file1_name, file2_name, html_name):
            """对比xml文件并输出HTML"""
            text1_lines = self.read_xml_file(file1_name)
            text2_lines = self.read_xml_file(file2_name)
            diff = difflib.HtmlDiff()  # 创建 diff 对象
            result = diff.make_file(text1_lines, text2_lines)  # 通过 make_file 方法输出源文件 HTML 的对比结果
            try:
                with open(html_name, 'w', encoding='UTF-8') as f:  # 将结果保存到文件中
                    f.write(result)
            except IOError as error:
                self.logger.error('write_compare_file error:{0}'.format(error))

     

  4. 完整代码:
    import os
    import sys
    import difflib
    import datetime
    from bs4 import BeautifulSoup
    from AnhuiMobile.script.checkauth.logModule import LogClass
    
    
    class ComparisonXMLFileDiff(LogClass):
        """比较两个XML文件差异性"""
    
        def __init__(self, origin_root_path="E:\\AnhuiMobile\\AnhuiMobile\\script\\checkauth\\dir1\\",
                     comparison_root_path="E:\\AnhuiMobile\\AnhuiMobile\\script\\checkauth\\dir2\\",
                     html_path="E:\\AnhuiMobile\\AnhuiMobile\\script\\checkauth\\htmlresult\\",
                     log_path="E:\\AnhuiMobile\\AnhuiMobile\\script\\checkauth\\logs\\"):
            """初始化"""
            LogClass.__init__(self, logName='ComparisonXMLFileDiff')
            # 日志输出到文件
            self.setTimedRotatingFileHandler(fileName='ComparisonXMLFileDiff', path=log_path)
            # 控制台输出日志
            self.setStreamHandler()
            # XML文件所在路径
            self.origin_root_path = origin_root_path
            self.comparison_root_path = comparison_root_path
            # 输出的 HTML文件路径
            self.html_path = html_path
            # 模板 HTML文件路径
            self.base_html_path = "E:\\AnhuiMobile\\AnhuiMobile\\script\\checkauth\\"
            # 当前时间
            self.now = datetime.datetime.strftime(datetime.datetime.now(), "%Y-%m-%d")
            # HTML模板list
            self.base_html_list = []
            # 记录模板 HTML第26行为<tbody></tbody>
            self.count = 26
    
        def get_xml_name(self, path):
            """获取所有的 XML文件名"""
            xml_name_list = []
            parents = os.listdir(path)
            for xml_name in parents:
                xml_name_list.append(xml_name)
            return xml_name_list
    
        def read_xml_file(self, file_name):
            """读取 XML文件"""
            try:
                file_handle = open(file_name, 'r', encoding='UTF-8')
                text = file_handle.read().splitlines()  # 读取后以行进行分割
                file_handle.close()
                return text
            except IOError as error:
                self.logger.error('read_xml_file Error: {0}'.format(error))
                sys.exit()
    
        def compare_file(self, file1_name, file2_name, html_name):
            """对比xml文件并输出HTML"""
            text1_lines = self.read_xml_file(file1_name)
            text2_lines = self.read_xml_file(file2_name)
            diff = difflib.HtmlDiff()  # 创建 diff 对象
            result = diff.make_file(text1_lines, text2_lines)  # 通过 make_file 方法输出源文件 HTML 的对比结果
            try:
                with open(html_name, 'w', encoding='UTF-8') as f:  # 将结果保存到文件中
                    f.write(result)
            except IOError as error:
                self.logger.error('write_compare_file error:{0}'.format(error))
    
        def get_base_html(self):
            """获取模板 HTML存入 List"""
            try:
                with open(self.base_html_path + "base_language_diff.html", "r", encoding="UTF-8") as f:
                    for i in f.readlines():
                        self.base_html_list.append(i)
            except IOError as error:
                self.logger.error('get_base_html error:{0}'.format(error))
    
        def delete_same_from_html(self, html_name, xml_name1):
            """剔除HTML中没有差异行,输出差异行日志"""
            sign = 0  # 标记是否有差异行,0表示没有
            diff_html_list = []
            try:
                with open(html_name, 'r', encoding="UTF-8") as f:
                    lines = f.readlines()
                    for line_html in lines:
                        if line_html.__contains__('encoding="utf-8"') or line_html.__contains__('encoding="UTF-8"'):
                            xml_name = """<tr><td class="diff_next"></td><td class="diff_header"></td><td nowrap="nowrap"><span
                                   style="text-align: center;display:block;color:blue;font-size:20px;background-color:#999;">
                                   <strong>""" + xml_name1 + """</strong></span></td><td class="diff_next"></td><td 
                                   class="diff_header"></td><td nowrap="nowrap" style="text-align: center;display:block;
                                   color:blue;font-size:20px;background-color:#999;"><span ><strong>""" + xml_name1 \
                                       + """</strong></span></td></tr>"""  # 将文件名拼到 HTML 中
                            diff_html_list.append(xml_name)  # XML文件名存入list
                        elif line_html.__contains__('<tr><td class="diff_next"'):
                            if line_html.__contains__('class="diff_add"') or line_html.__contains__('class="diff_chg"') or \
                                    line_html.__contains__('class="diff_sub"'):  # 筛选出差异行 HTML
                                diff_html_list.append(line_html)  # 差异行 HTML存入list
                                sign = 1  # 有差异行,标记改为 1
                                soup = BeautifulSoup(line_html, 'html.parser')
                                line = soup.find("td", attrs={'class': 'diff_header'}).get_text().strip()
                                # content1 = soup.find_all("td", attrs={'nowrap': 'nowrap'})[0].get_text().strip()  # 提取差异文本
                                # content2 = soup.find_all("td", attrs={'nowrap': 'nowrap'})[1].get_text().strip()
                                self.logger.debug(xml_name1 + "][line: " + line + "]")
                            else:
                                continue
                        else:
                            continue
                os.remove(html_name)  # 移除源文件 HTML 的对比结果
                if sign == 1:  # 有差异行,写入模板
                    for diff_html in diff_html_list:
                        self.base_html_list.insert(self.count, diff_html)  # 将差异行 HTML 插入<tbody></tbody>中
                        self.count += 1
                else:  # 没有差异行,返回
                    return
            except Exception as e:
                self.logger.error(e)
    
        def main(self):
            origin_xml_name_list = self.get_xml_name(self.origin_root_path)  # 获取所有的xml文件名
            comparison_xml_name_list = self.get_xml_name(self.comparison_root_path)
    
            for xml_name1 in list(set(origin_xml_name_list).difference(set(comparison_xml_name_list))):  # 判断对比文件夹中是否存在
                self.logger.info(
                    xml_name1 + "][" + self.origin_root_path + " -> exist" + "][" + self.comparison_root_path + " -> not_exist")
    
            for xml_name2 in list(set(comparison_xml_name_list).difference(set(origin_xml_name_list))):
                self.logger.info(
                    xml_name2 + "][" + self.origin_root_path + " -> not_exist" + "][" + self.comparison_root_path + " -> exist")
            self.get_base_html()
            for xml_name1 in origin_xml_name_list:
                for xml_name2 in comparison_xml_name_list:
                    if xml_name1 == xml_name2:
                        file_path1 = self.origin_root_path + xml_name1  # 获取所有的xml文件名
                        file_path2 = self.comparison_root_path + xml_name2
                        html_name = self.html_path + self.now + "-" + str(xml_name1).split(".")[0] + '.html'
                        self.compare_file(file_path1, file_path2, html_name)  # 对比xml文件并输出HTML
                        self.delete_same_from_html(html_name, xml_name1)  # 剔除HTML中没有差异行,输出差异行日志
                    else:
                        continue
            try:
                html_list = "".join(self.base_html_list)  # 将模板List拼接写入
                with open(self.html_path + self.now + "_language_diff.html", "w", encoding="UTF-8") as f_write:
                    f_write.write(html_list)
            except IOError as error:
                self.logger.error('write_language_diff error:{0}'.format(error))
    
    
    if __name__ == '__main__':
        c = ComparisonXMLFileDiff()
        c.main()
    
  • 1
    点赞
  • 4
    收藏
    觉得还不错? 一键收藏
  • 3
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论 3
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值