快乐Python快速上手系列2 - XML文件读写

前言
XML处理是日常工作经常遇到的部分,如配置文件,下面介绍快速使用Python进行XML处理的方法。

XML的处理,在Python的库里,有自身所带的库,也有第三方,如果进行简单的读写,推荐ElementTree和minidom,前者是python特有的xml处理方式,后者是DOM的简单实现。

其他的python中xml处理的库还包括,
xml.dom, DOM API;
xml.dom.pulldom, 支持建立部分dom tree;
xml.sax, 需要自己提供接口函数功能;
xml.parsers.expat, 快速xml处理,但是不安全。

1. ElementTree

1.1 ElementTree 写Xml文件
from xml.etree import ElementTree

def xml_writer():
    root = ElementTree.Element("Data")

    country1 = ElementTree.SubElement(root, "country")
    country1.set("name", "Liechtenstein")
    rank1 = ElementTree.SubElement(country1, "rank")
    rank1.text = "1"
    year1 = ElementTree.SubElement(country1, "year")
    year1.text = "2011"
    gdppc = ElementTree.SubElement(country1, "gdppc")
    gdppc.text = "141100"


    country2 = ElementTree.SubElement(root, "country")
    country2.set("name", "Singapore")
    rank1 = ElementTree.SubElement(country2, "rank")
    rank1.text = "4"
    year1 = ElementTree.SubElement(country2, "year")
    year1.text = "2011"
    gdppc = ElementTree.SubElement(country2, "gdppc")
    gdppc.text = "59900"


    country3 = ElementTree.SubElement(root, "country")
    country3.set("name", "Panama")
    rank1 = ElementTree.SubElement(country3, "rank")
    rank1.text = "68"
    year1 = ElementTree.SubElement(country3, "year")
    year1.text = "2011"
    gdppc = ElementTree.SubElement(country3, "gdppc")
    gdppc.text = "13600"


    tree = ElementTree.ElementTree(root)
    tree.write("samplexml.xml")

xml_writer()
生成结果

1.2 ElementTree顺序依次读取XML文件
def xml_reader():

    tree = ElementTree.parse("samplexml.xml")

    root = tree.getroot()



    print(root.tag)



    for child in root:

        print(" " + child.tag)

        for gran_child in child:

            print(" " + gran_child.tag + ":" + gran_child.text)



xml_reader()
结果输出
Data

    country

        rank:1

        year:2011

        gdppc:141100

    country

        rank:4

        year:2011

        gdppc:59900

    country

        rank:68

        year:2011

        gdppc:13600

1.3 ElementTree查找读XML文件
def xml_finder():

    tree = ElementTree.parse("samplexml.xml")

    root = tree.getroot()



    for item in root.iter("gdppc"):

        print item.text



xml_finder()
输出结果
141100

59900

13600

1.4 ElementTree修改XML文件
def xml_modifier():

    tree = ElementTree.parse("samplexml.xml")

    root = tree.getroot()



    for item in root.iter("gdppc"):

        item.text = str(int(item.text) + 99)

    tree.write("samplexml.xml")



xml_modifier()

xml_finder()
输出结果
141199

59999

13699

1.5 ElementTree删除某个节点
def xml_deleter():
    tree = ElementTree.parse("samplexml.xml")
    root = tree.getroot()

    for country in root.findall("country"):
        rank = int(country.find('rank').text)
        if rank > 50:
            root.remove(country)
    tree.write("samplexml.xml")

print "calling xml_deleter..."
xml_deleter()
print "after delete, the file content will be:"
xml_reader()
输出结果
Data
    country
        rank:1
        year:2011
        gdppc:141100
    country
        rank:4
        year:2011
        gdppc:59900
    country
        rank:68
        year:2011
        gdppc:13600

calling xml_deleter...
after delete, the file content will be:

Data
    country
        rank:1
        year:2011
        gdppc:141100
    country
        rank:4
        year:2011
        gdppc:59900

1.6 ElementTree从字符串load数据
xmlstr = '''<?xml version="1.0"?>
<data>
    <country name="Liechtenstein">
        <rank updated="yes">2</rank>
        <year>2008</year>
        <gdppc>141100</gdppc>
        <neighbor name="Austria" direction="E"/>
        <neighbor name="Switzerland" direction="W"/>
    </country>
    <country name="Singapore">
        <rank updated="yes">5</rank>
        <year>2011</year>
        <gdppc>59900</gdppc>
        <neighbor name="Malaysia" direction="N"/>
    </country>
</data>
'''
root = ElementTree.fromstring(xmlstr)
for child in root:
    print child.tag

1.4 ElementTree通过XPath读取XML
需要具体了解XPATH的规则,几个简单例子:
def xml_xpath():
    xmlstr = '''<?xml version="1.0"?>
    <data>
        <country name="Liechtenstein">
            <rank updated="yes">2</rank>
            <year>2008</year>
            <gdppc>141100</gdppc>
            <neighbor name="Austria" direction="E"/>
            <neighbor name="Switzerland" direction="W"/>
        </country>
        <country name="Singapore">
            <rank updated="yes">5</rank>
            <year>2011</year>
            <gdppc>59900</gdppc>
            <neighbor name="Malaysia" direction="N"/>
        </country>
    </data>
    '''
    print "xmlstr is :"
    print xmlstr
    root = ElementTree.fromstring(xmlstr)

    # Top-level elements
    print "finding ."
    childlist = root.findall(".")
    for item in childlist:
        print item.tag

    # All 'neighbor' grand-children of 'country' children of the top-level
    # elements
    print "finding ./country/neighbor"
    childlist = root.findall("./country/neighbor")
    for item in childlist:
        print item.tag

    # Nodes with name='Singapore' that have a 'year' child
    print "finding .//year/..[@name='Singapore']"
    childlist = root.findall(".//year/..[@name='Singapore']")
    for item in childlist:
        print item.tag

    # 'year' nodes that are children of nodes with name='Singapore'
    print "finding .//*[@name='Singapore']/year"
    childlist = root.findall(".//*[@name='Singapore']/year")
    for item in childlist:
        print item.tag

    # All 'neighbor' nodes that are the second child of their parent
    print "finding .//neighbor[2]"
    childlist = root.findall(".//neighbor[2]")
    for item in childlist:
        print item.tag

xml_xpath()
结果输出
xmlstr is :

<?xml version="1.0"?>

<data>

    <country name="Liechtenstein">

        <rank updated="yes">2</rank>

        <year>2008</year>

        <gdppc>141100</gdppc>

        <neighbor name="Austria" direction="E"/>

        <neighbor name="Switzerland" direction="W"/>

    </country>

    <country name="Singapore">

        <rank updated="yes">5</rank>

        <year>2011</year>

        <gdppc>59900</gdppc>

        <neighbor name="Malaysia" direction="N"/>

    </country>

</data>



finding .

data

finding ./country/neighbor

neighbor

neighbor

neighbor

finding .//year/..[@name='Singapore']

country

finding .//*[@name='Singapore']/year

year

finding .//neighbor[2]

neighbor


2. Minidom
Why Minidom instead of DOM?
xml.dom.minidom is a minimal implementation of the Document Object
Model interface, with an API similar to that in other languages. It is intended
to be simpler than the full DOM and also significantly smaller. Users who are
not already proficient with the DOM should consider using the
xml.etree.ElementTree module for their XML processing instead.
主要意思是Minidom是一个DOM的简单实现,使用者需要熟悉DOM规范。

2.1 minidom写文件
def xml_minidom_writer():

    impl = xml.dom.minidom.getDOMImplementation()

    dom = impl.createDocument(None, "countrylist", None)

    root = dom.documentElement

    contry = dom.createElement('country')

    contry.setAttribute('name', 'Liechtenstein')

    root.appendChild(contry)

    rank = dom.createElement('rank')

    rank_value = dom.createTextNode('2')

    rank.appendChild(rank_value)

    rank.setAttribute('updated', 'yes')

    contry.appendChild(rank)



    year = dom.createElement('year')

    year_value = dom.createTextNode('2008')

    year.appendChild(year_value)

    contry.appendChild(year)



    contry = dom.createElement('country')

    contry.setAttribute('name', 'Singapore')

    root.appendChild(contry)

    rank = dom.createElement('rank')

    rank_value = dom.createTextNode('5')

    rank.appendChild(rank_value)

    rank.setAttribute('updated', 'yes')

    contry.appendChild(rank)



    year = dom.createElement('year')

    year_value = dom.createTextNode('2011')

    year.appendChild(year_value)

    contry.appendChild(year)



    f = open('sample.xml', 'w')

    dom.writexml(f, indent ="", addindent = ' ', newl = '\n')

    f.close()

    pass

xml_minidom_writer()
运行结果
<?xml version="1.0" ?>

<countrylist>

    <country name="Liechtenstein">

        <rank updated="yes">2</rank>

        <year>2008</year>

    </country>

    <country name="Singapore">

        <rank updated="yes">5</rank>

        <year>2011</year>

    </country>

</countrylist>

2.2 minidom读文件
def xml_minidom_reader():

    doc = xml.dom.minidom.parse("sample.xml")

    root = doc.documentElement

    country_list = root.getElementsByTagName('country')

    print('list country in xml file:')

    print('\n')

    for country in country_list:

        print(country.toxml())

    print('\n')

    print('list rank in each country')

    print('\n')

    for country in country_list:

        print(country.nodeName)

        nameNode = country.getElementsByTagName('rank')[0]

        print(nameNode.nodeName + ':' + nameNode.childNodes[0].nodeValue)



xml_minidom_reader()
输出结果
list country in xml file:

<country name="Liechtenstein">
        <rank updated="yes">2</rank>
        <year>2008</year>
    </country>
<country name="Singapore">
        <rank updated="yes">5</rank>
        <year>2011</year>
    </country>


list rank in each country

country
rank:2
country
rank:5

Wish you happy with python:)
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值