使用python解析xml
python自带了几种解析方法 xml.parsers.expat、xml.dom、xml.etree.ElementTree、xml.sax等
这里介绍xml.dom和xml.etree.ElementTree
示例文件:
<?xml version="1.0" encoding="utf-8"?>
<parent id="top">
<!--中文注释-->
<child1 name="paul">Text goes here</child1>
<child2 name="fred">More text</child2>
<child3 name="小明" age="12"> 中文说明 </child3>
</parent>
ElementTree解析
#!/usr/bin/python
# -*- coding: utf-8 -*-
#python file
#Filename:xtree.py
# etree 解析,注释会丢失
import xml.etree.ElementTree as xparser
import os
def FindNodeAndSetAttr(xml, nodeName, attrMap):
ele = xml.find(nodeName)
atts = ele.attrib
for k in attrMap:
print(atts[k])
for k, v in attrMap.items():
print("%s--%s" % (k,v))
ele.set(k, v)
print("current dir is %s" % (os.getcwd()))
params = {'name':'小明',
'age':'12'}
xfile = "./example.xml"
xml1 = xparser.parse(xfile)
FindNodeAndSetAttr(xml1, 'child3', params)
xml1.write('aa.xml', 'UTF-8', True)
dom解析
#!/usr/bin/python
# -*- coding: utf-8 -*-
#python file
#Filename:xdom.py
# 使用dom可以保留注释
# 但是编码指令会丢失,需要手动写回去
import xml.dom.minidom as xparser
import os
import shutil
# 查找node并设置一些属性
def FindNodeAndSetAttr(xml, nodeName, attrMap):
eles = xml.getElementsByTagName(nodeName)
assert(eles[0].nodeName == nodeName)
ele = eles[0]
for k in attrMap:
assert(ele.hasAttribute(k))
for k, v in attrMap.items():
print("%s--%s" % (k,v))
ele.setAttribute(k, v)
# 把xml以utf8编码写文件
def WriteAsUTF8File(xmlparser, fileName):
xmlstr = xmlparser.toxml()
assert(xmlstr.startswith('<?xml version="1.0" ?>'))
xmlstr = xmlstr.replace('<?xml version="1.0" ?>',
'<?xml version="1.0" encoding="UTF-8" ?>\n')
fout = open(fileName, 'wb')
utf8bytes = b'\xEF\xBB\xBF' + xmlstr.encode('utf-8')
fout.write(utf8bytes)
fout.close()
# 删除目录树,含异常处理
def myrmtree(dirtree):
try:
shutil.rmtree(dirtree)
except WindowsError as e:
print(e)
print("current dir is %s" % (os.getcwd()))
params = {'name':'小明',
'age':'12'}
cfg = "./example.xml"
cfg_bak = cfg + "_bak"
print("=====%s=======" % (cfg))
if os.path.exists(cfg_bak):
print("%s has been processed" % (cfg))
exit(-1)
shutil.move(cfg, cfg_bak)
xml1 = xparser.parse(cfg_bak)
FindNodeAndSetAttr(xml1, 'child3', params)
WriteAsUTF8File(xml1, 'bb.xml')
输出
current dir is I:\新建文件夹 (2)
12
小明
age--12
name--小明
current dir is I:\新建文件夹 (2)
=====./example.xml=======
age--12
name--小明