从xml文件中获取相关数据,解决xml中部分数据没有结束的问题

本文档提供了一种解决方案,用于从XML文件中获取批量数据,特别是处理分光器(fgq)和PON光衰数据中存在未结束的问题。通过正则表达式匹配,识别并统计错误数据,然后对错误数据进行处理,将其转换为正确的XML格式。最后,提供了将修正后的数据写入新文件的方法。
摘要由CSDN通过智能技术生成
#!/usr/bin/env python
#encoding=utf-8

"""
@version:v1.0
@author:sxf
@software:PyCharm
@file:fgqgspl.py
@time:2017-08-05
"""

import  sys
import  re
import  os
import  codecs
from xml.dom import minidom
"""
#获取原数据中含有批量关键字的数据
def get_batch_data(reader,writer):
    input_file = codecs.open(reader,"r","utf-8") #指定utf-8编码格式打开
    if os.path.exists(writer):
        os.remove(writer)
    write_file = open(writer,'a+')
    pattern = re.compile(r'.*批量.*')
    for line in input_file:
        match=pattern.match(line)
        if match:
            write_file.write(line)
    input_file.close()
    write_file.close()
"""
"""
#获取错误数据
def get_irregul_data(reader,writer_fgq,writer_fgq_no_onu,writer_pon,writer_pon_no_onu,writer_normal_pon_fgq):
    input_file = open(reader,"r")
    flag = 1
    pattern = re.compile(r'.*分光器光衰批量查询.*')
    fgq_no_end_count = 0  # 统计分光器中数据出错数量
    fgq_no_onu_count = 0  # 统计分光器中str[3]没有onu数量
    fgq_normal_count = 0  #统计分光器中数据正确数量
    pon_no_end_count = 0  # 统计PON光衰中数据出错数量
    pon_no_onu_count = 0  #统计PON光衰中str[3]没有onu数量
    pon_normal_count = 0  #统计PON光衰中数据正确数量
    if os.path.exists(writer_fgq):
        os.remove(writer_fgq)
    write_fgq = open(writer_fgq,"a+")  #存放分光器中数据没有结束的数据
    if os.path.exists(writer_pon):
        os.remove(writer_pon)
    write_pon = open(writer_pon,"a+")  #存放PON中数据没有结束的数据
    if os.path.exists(writer_fgq_no_onu):
        os.remove(writer_fgq_no_onu)
    write_fgq_onu = open(writer_fgq_no_onu,"a+")  #存放分光器中没有onu的数据(无法获取除ip和areaid以外的数据)
    if os.path.exists(writer_pon_no_onu):
        os.remove(writer_pon_no_onu)
    write_pon_onu = open(writer_pon_no_onu,"a+")  #存放PON中没有onu的数据(无法获取除ip和areaid以外的数据)
    if os.path.exists(writer_normal_pon_fgq):
        os.remove(writer_normal_pon_fgq)
    write_normal_pon_fgq = open(writer_normal_pon_fgq,"a+")  #存放批量中正确的数据
    for line in input_file:
        match = pattern.match(line)
        str = line.split('^')
        if match:
           try:
               pos_last_onu = str[3].index("</root>")
               fgq_normal_count = fgq_normal_count + 1
               write_normal_pon_fgq.write(line)
           except ValueError:
               fgq_no_end_count = fgq_no_end_count + 1
               write_fgq.write(line)
           try:
               pos_no_onu = str[3].index("onu")
           except ValueError:
               fgq_no_onu_count = fgq_no_onu_count + 1
               write_fgq_onu.write(line)
        else:
            try:
                pos_last_onu = str[3].index("</root>")
                pon_normal_count = pon_normal_count + 1
                write_normal_pon_fgq.write(line)
            except ValueError:
                pon_no_end_count = pon_no_end_count + 1
                write_pon.write(line)
            try:
                pos_no_onu = str[3].index("onu")
            except ValueError:
                pon_no_onu_count = pon_no_onu_count + 1
                write_pon_onu.write(line)

    print("fgq_no_end_count:",fgq_no_end_count)
    print("fgq_no_onu_count:", fgq_no_onu_count)
    print("fgq_normal_count:",fgq_normal_count)
    print("pon_no_end_count:", pon_no_end_count)
    print("pon_no_onu_count:",pon_no_onu_count)
    print("pon_normal_count:",pon_normal_count)
    input_file.close()
    write_fgq.close()
    write_fgq_onu.close()
    write_pon.close()
    write_pon_onu.close()
"""
"""
#处理错误数据
def deal_irregular_data(reader_fgq_irreg,reader_pon_irreg,writer_toxml):
    fgq_input_file = open(reader_fgq_irreg,"r")
    pon_input_file = open(reader_pon_irreg,"r")
    if os.path.exists(writer_toxml):
        os.remove(writer_toxml)
    xml_output_file = open(writer_toxml,"a+")

    for line in fgq_input_file:
        str = line.split('^')
        last_char = line[len(line)-2]
        if (('a'<=last_char and last_char<='z') or ('A'<=last_char and last_char<='Z')):
            # 获取最后17个字符
            str_last_ten = l
  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值