python-xml.etree.ElementTree解析xml文件

最新推荐文章于 2021-06-09 15:04:40 发布

Tanya_girl

最新推荐文章于 2021-06-09 15:04:40 发布

阅读量579

点赞数

本文链接：https://blog.csdn.net/Tanya_girl/article/details/78054058

版权

python 专栏收录该内容

9 篇文章 0 订阅

订阅专栏

直接上代码，请看注释。附件图片为原始xml文件的部分展示。

import os
import gzip
import xml.etree.ElementTree as ET



def getfilename(path,l = ''):
    l = str(l)
    file = []#定义一个列表用来接收搜索到的符合条件的文件名（含绝对路径）
    sorfile = []
    
    for k in os.walk( path ):
        k = list(k)
        #print(k)
        sorfile.append(k)    
    for i in range(len(sorfile)):
        if not sorfile[i][2]:
            pass
        else:
            for j in range(len(sorfile[i][2])):
                if l in sorfile[i][2][j]:                 
                    fi = sorfile[i][0]+'\\'+sorfile[i][2][j]
                    file.append(fi)
                else:
                    pass    
    if not file:
        file=u'未找到符合条件的文件'  #
    else:
        return file # 返回file，如果有搜索到文件，怎返回一个非空的list，如过没有找到文件，则返回一个字符串


if __name__== '__main__':
    with open('res.txt','w') as g:#这两步是为了清空上次记录的log
        pass
    with open('res-err.txt','a') as r: #这两步是为了清空上次记录的log
        pass
    
    #a = []
    path = r'D:\BaiduNetdiskDownload\0730\total'  #输入要excel文件所在的文件夹路径
    #path = path.replace('\\',r'\\')
    a = getfilename(path,l = '_MRS_NSN_OMC_')#获取文件名字  逗号后l = '' 引号里为空，默认不筛选，全部读取名字，如果有内容则会对引号中的内容来匹配文件，假如l = '周报'  将会读取所有带 基站  的excel
    #print(a)
    
    if type(a) =='str':#这里对输入的对象进行判断，如果是字符类型，则说明没有找到符合条件的文件，不执行任何操作。
        print(u'未找到符合条件的文件')
    else:
        for i in range(len(a)):#对符合条件的文件进行迭代处理
            try:#异常处理
                with gzip.open(a[i]) as f:
                    #k=f.read()
                    tree = ET.parse(f)
                    root = tree.getroot()
                    #for child in root[1]:
                    for neighbor in root.iter('eNB'):#root是bulkPmMrDataFile，里边有两个可迭代对象，一个是eNB，一个是fileHeader
                        for neighbor1 in neighbor.iter('measurement'):#通过上一步，找到了eNB，eNB里有一类可迭代对象，是measurement
                            k=neighbor1.attrib.get('mrName')  #通过上一步，找到了measurement，measurement里有两类可迭代对象，一种是object，这里要做的是先挑出来属性标签是 mrName=MR.RSRP 的那些measurement，其实也就是只有第一个是
                            s='MR.RSRP'
                            
                            if s in k:#找到合适的measurement后 再进一步解析里边的object标签
                                for neighbor2 in neighbor1.iter('object'):
                                    h = neighbor2.findall('v')#解析object里的v标签
                                    for t in h:
                                        with open('res.txt','a') as g:
                                            l = neighbor.attrib.get('id')+' '+ t.text#v标签里的内容就是我们要提取的，通过text方法获得。
                                            g.write(l+'\n')  #把提取的内容追加写入到txt中
                                break#这里用了break
                            else:
                                pass
            except Exception as e:
                with open('res-err.txt','a') as r:#异常处理
                    r.writelines(e+'\n')#把报错写进err.txt