快乐Python快速上手系列2 - XML文件读写

最新推荐文章于 2024-08-13 22:44:30 发布

angela_NH

最新推荐文章于 2024-08-13 22:44:30 发布

阅读量466

点赞数

文章标签： python xml

本文链接：https://blog.csdn.net/angela_NH/article/details/78912390

版权

 
 前言 

  XML处理是日常工作经常遇到的部分，如配置文件，下面介绍快速使用Python进行XML处理的方法。 

  XML的处理，在Python的库里，有自身所带的库，也有第三方，如果进行简单的读写，推荐ElementTree和minidom，前者是python特有的xml处理方式，后者是DOM的简单实现。 

  其他的python中xml处理的库还包括， 

  xml.dom, DOM API; 

  xml.dom.pulldom, 支持建立部分dom tree; 

  xml.sax, 需要自己提供接口函数功能; 

  xml.parsers.expat， 快速xml处理，但是不安全。 

 
 1. ElementTree 

  1.1 ElementTree 写Xml文件 

 
      from xml.etree import ElementTree 
     
      def xml_writer(): 
     
          root = ElementTree.Element("Data") 
     
          country1 = ElementTree.SubElement(root, "country") 
     
          country1.set("name", "Liechtenstein") 
     
          rank1 = ElementTree.SubElement(country1, "rank") 
     
          rank1.text = "1" 
     
          year1 = ElementTree.SubElement(country1, "year") 
     
          year1.text = "2011" 
     
          gdppc = ElementTree.SubElement(country1, "gdppc") 
     
          gdppc.text = "141100" 
     
          country2 = ElementTree.SubElement(root, "country") 
     
          country2.set("name", "Singapore") 
     
          rank1 = ElementTree.SubElement(country2, "rank") 
     
          rank1.text = "4" 
     
          year1 = ElementTree.SubElement(country2, "year") 
     
          year1.text = "2011" 
     
          gdppc = ElementTree.SubElement(country2, "gdppc") 
     
          gdppc.text = "59900" 
     
          country3 = ElementTree.SubElement(root, "country") 
     
          country3.set("name", "Panama") 
     
          rank1 = ElementTree.SubElement(country3, "rank") 
     
          rank1.text = "68" 
     
          year1 = ElementTree.SubElement(country3, "year") 
     
          year1.text = "2011" 
     
          gdppc = ElementTree.SubElement(country3, "gdppc") 
     
          gdppc.text = "13600" 
     
          tree = ElementTree.ElementTree(root) 
     
          tree.write("samplexml.xml") 
     
      xml_writer()

  生成结果 

  1.2 ElementTree顺序依次读取XML文件 

 
      def xml_reader(): 
     
          tree = ElementTree.parse("samplexml.xml") 
     
          root = tree.getroot() 
     
          print(root.tag) 
     
          for child in root: 
     
              print(" " + child.tag) 
     
              for gran_child in child: 
     
                  print(" " + gran_child.tag + ":" + gran_child.text) 
     
      xml_reader()

  结果输出 

 
      Data 
     
          country 
     
              rank:1 
     
              year:2011 
     
              gdppc:141100 
     
          country 
     
              rank:4 
     
              year:2011 
     
              gdppc:59900 
     
          country 
     
              rank:68 
     
              year:2011 
     
              gdppc:13600

  1.3 ElementTree查找读XML文件 

 
      def xml_finder(): 
     
          tree = ElementTree.parse("samplexml.xml") 
     
          root = tree.getroot() 
     
          for item in root.iter("gdppc"): 
     
              print item.text 
     
      xml_finder()

  输出结果 

  1.4 ElementTree修改XML文件 

 
      def xml_modifier(): 
     
          tree = ElementTree.parse("samplexml.xml") 
     
          root = tree.getroot() 
     
          for item in root.iter("gdppc"): 
     
              item.text = str(int(item.text) + 99) 
     
          tree.write("samplexml.xml") 
     
      xml_modifier() 
     
      xml_finder()

  输出结果 

  1.5 ElementTree删除某个节点 

 
      def xml_deleter(): 
     
          tree = ElementTree.parse("samplexml.xml") 
     
          root = tree.getroot() 
     
          for country in root.findall("country"): 
     
              rank = int(country.find('rank').text) 
     
              if rank > 50: 
     
                  root.remove(country) 
     
          tree.write("samplexml.xml")  
     
      print "calling xml_deleter..." 
     
      xml_deleter()  
     
      print "after delete, the file content will be:"  
     
      xml_reader()

  输出结果 

 
      Data 
     
          country 
     
              rank:1 
     
              year:2011 
     
              gdppc:141100 
     
          country 
     
              rank:4 
     
              year:2011 
     
              gdppc:59900 
     
          country 
     
              rank:68 
     
              year:2011 
     
              gdppc:13600 
     
      calling xml_deleter... 
     
      after delete, the file content will be: 
     
      Data 
     
          country 
     
              rank:1 
     
              year:2011 
     
              gdppc:141100 
     
          country 
     
              rank:4 
     
              year:2011 
     
              gdppc:59900

  1.6 ElementTree从字符串load数据 

 
      xmlstr = '''<?xml version="1.0"?> 
     
      <data> 
     
          <country name="Liechtenstein"> 
     
              <rank updated="yes">2</rank> 
     
              <year>2008</year> 
     
              <gdppc>141100</gdppc> 
     
              <neighbor name="Austria" direction="E"/> 
     
              <neighbor name="Switzerland" direction="W"/> 
     
          </country> 
     
          <country name="Singapore"> 
     
              <rank updated="yes">5</rank> 
     
              <year>2011</year> 
     
              <gdppc>59900</gdppc> 
     
              <neighbor name="Malaysia" direction="N"/> 
     
          </country> 
     
      </data> 
     
      ''' 
     
      root = ElementTree.fromstring(xmlstr) 
     
      for child in root: 
     
          print child.tag

  1.4 ElementTree通过XPath读取XML 

  需要具体了解XPATH的规则，几个简单例子： 

 
      def xml_xpath(): 
     
          xmlstr = '''<?xml version="1.0"?> 
     
          <data> 
     
              <country name="Liechtenstein"> 
     
                  <rank updated="yes">2</rank> 
     
                  <year>2008</year> 
     
                  <gdppc>141100</gdppc> 
     
                  <neighbor name="Austria" direction="E"/> 
     
                  <neighbor name="Switzerland" direction="W"/> 
     
              </country> 
     
              <country name="Singapore"> 
     
                  <rank updated="yes">5</rank> 
     
                  <year>2011</year> 
     
                  <gdppc>59900</gdppc> 
     
                  <neighbor name="Malaysia" direction="N"/> 
     
              </country> 
     
          </data> 
     
          ''' 
     
          print "xmlstr is :" 
     
          print xmlstr 
     
          root = ElementTree.fromstring(xmlstr) 
     
          # Top-level elements 
     
          print "finding ." 
     
          childlist = root.findall(".") 
     
          for item in childlist: 
     
              print item.tag 
     
          # All 'neighbor' grand-children of 'country' children of the top-level 
     
          # elements 
     
          print "finding ./country/neighbor" 
     
          childlist = root.findall("./country/neighbor") 
     
          for item in childlist: 
     
              print item.tag 
     
          # Nodes with name='Singapore' that have a 'year' child 
     
          print "finding .//year/..[@name='Singapore']" 
     
          childlist = root.findall(".//year/..[@name='Singapore']") 
     
          for item in childlist: 
     
              print item.tag 
     
          # 'year' nodes that are children of nodes with name='Singapore' 
     
          print "finding .//*[@name='Singapore']/year" 
     
          childlist = root.findall(".//*[@name='Singapore']/year") 
     
          for item in childlist: 
     
              print item.tag 
     
          # All 'neighbor' nodes that are the second child of their parent 
     
          print "finding .//neighbor[2]" 
     
          childlist = root.findall(".//neighbor[2]") 
     
          for item in childlist: 
     
              print item.tag 
     
      xml_xpath()

  结果输出 

 
      xmlstr is : 
     
      <?xml version="1.0"?> 
     
      <data> 
     
          <country name="Liechtenstein"> 
     
              <rank updated="yes">2</rank> 
     
              <year>2008</year> 
     
              <gdppc>141100</gdppc> 
     
              <neighbor name="Austria" direction="E"/> 
     
              <neighbor name="Switzerland" direction="W"/> 
     
          </country> 
     
          <country name="Singapore"> 
     
              <rank updated="yes">5</rank> 
     
              <year>2011</year> 
     
              <gdppc>59900</gdppc> 
     
              <neighbor name="Malaysia" direction="N"/> 
     
          </country> 
     
      </data> 
     
      finding . 
     
      data 
     
      finding ./country/neighbor 
     
      neighbor 
     
      neighbor 
     
      neighbor 
     
      finding .//year/..[@name='Singapore'] 
     
      country 
     
      finding .//*[@name='Singapore']/year 
     
      year 
     
      finding .//neighbor[2] 
     
      neighbor

  2. Minidom 

  Why Minidom instead of DOM? 

 
      xml.dom.minidom is a minimal implementation of the Document Object 
     
      Model interface, with an API similar to that in other languages. It is intended 
     
      to be simpler than the full DOM and also significantly smaller. Users who are 
     
      not already proficient with the DOM should consider using the 
     
      xml.etree.ElementTree module for their XML processing instead. 
     
      主要意思是Minidom是一个DOM的简单实现，使用者需要熟悉DOM规范。

  2.1 minidom写文件 

 
      def xml_minidom_writer(): 
     
          impl = xml.dom.minidom.getDOMImplementation() 
     
          dom = impl.createDocument(None, "countrylist", None) 
     
          root = dom.documentElement 
     
          contry = dom.createElement('country') 
     
          contry.setAttribute('name', 'Liechtenstein') 
     
          root.appendChild(contry) 
     
          rank = dom.createElement('rank') 
     
          rank_value = dom.createTextNode('2') 
     
          rank.appendChild(rank_value) 
     
          rank.setAttribute('updated', 'yes') 
     
          contry.appendChild(rank) 
     
          year = dom.createElement('year') 
     
          year_value = dom.createTextNode('2008') 
     
          year.appendChild(year_value) 
     
          contry.appendChild(year) 
     
          contry = dom.createElement('country') 
     
          contry.setAttribute('name', 'Singapore') 
     
          root.appendChild(contry) 
     
          rank = dom.createElement('rank') 
     
          rank_value = dom.createTextNode('5') 
     
          rank.appendChild(rank_value) 
     
          rank.setAttribute('updated', 'yes') 
     
          contry.appendChild(rank) 
     
          year = dom.createElement('year') 
     
          year_value = dom.createTextNode('2011') 
     
          year.appendChild(year_value) 
     
          contry.appendChild(year) 
     
          f = open('sample.xml', 'w') 
     
          dom.writexml(f, indent ="", addindent = ' ', newl = '\n') 
     
          f.close() 
     
          pass 
     
      xml_minidom_writer()

  运行结果 

 
      <?xml version="1.0" ?> 
     
      <countrylist> 
     
          <country name="Liechtenstein"> 
     
              <rank updated="yes">2</rank> 
     
              <year>2008</year> 
     
          </country> 
     
          <country name="Singapore"> 
     
              <rank updated="yes">5</rank> 
     
              <year>2011</year> 
     
          </country> 
     
      </countrylist>

  2.2 minidom读文件 

 
      def xml_minidom_reader(): 
     
          doc = xml.dom.minidom.parse("sample.xml") 
     
          root = doc.documentElement 
     
          country_list = root.getElementsByTagName('country') 
     
          print('list country in xml file:') 
     
          print('\n') 
     
          for country in country_list: 
     
              print(country.toxml()) 
     
          print('\n') 
     
          print('list rank in each country')  
     
          print('\n') 
     
          for country in country_list: 
     
              print(country.nodeName) 
     
              nameNode = country.getElementsByTagName('rank')[0] 
     
              print(nameNode.nodeName + ':' + nameNode.childNodes[0].nodeValue) 
     
      xml_minidom_reader()

  输出结果 

 
      list country in xml file: 
     
      <country name="Liechtenstein"> 
     
              <rank updated="yes">2</rank> 
     
              <year>2008</year> 
     
          </country> 
     
      <country name="Singapore"> 
     
              <rank updated="yes">5</rank> 
     
              <year>2011</year> 
     
          </country> 
     
      list rank in each country 
     
      country 
     
      rank:2 
     
      country 
     
      rank:5