python有3种方法解析XML:SAX,DOM以及ElemenTree
DOM解析:
from xml.dom.minidom import parse doc=parse("book.xml") #先把xml文件加载进来 root=doc.documentElement #获取元素的根节点 books=root.getElementsByTagName('book') #找到子节点,得到的是一个数组 for book in books: #把所有的子节点进行遍历 print("===book====") if book.hasAttribute('id'): #如果有ID属性,则输出 print('ID:%s'% book.getAttribute('id')) bookname=book.getElementsByTagName("bookname")[0] #根据标签名找到,并且输出第一个元素 print("BOOKNAME:%s"%bookname.childNodes[0].data) #输出标签名的子节点的第一个值,并转为data类型 author=book.getElementsByTagName("author")[0] print("AUTHOR:%s"%author.childNodes[0].data) price=book.getElementsByTagName("price")[0] print("PRICE:%s"%price.childNodes[0].data)
SAX解析:
from xml.sax import parse from xml.sax import ContentHandler class Student: def __init__(self,name=None,age=None,sex=None): self.name=name self.age=age self.sex=sex def __str__(self): return self.name+","+str(self.age)+","+self.sex students=[] class saxdemo(ContentHandler): def __init__(self): self.student=None self.tag=None def startDocument(self): pass def endDocument(self): pass def startElement(self, name, attrs): if name=='student': self.student=Student() def endElement(self, name): if name=='stuname': self.student.name=self.tag if name=='stuage': self.student.age=self.tag if name=='stusex': self.student.sex=self.tag if name=='student': students.append(self.student) def characters(self, content): self.tag=content parse("Student.xml",saxdemo()) for stu in students: print(stu)
elementTree解析xml:
import xml.etree.ElementTree as et root=et.parse("Student.xml") stus=root.findall("student") p=[] class Student: def __init__(self,name=None,age=None,sex=None): self.name=name self.age=age self.sex=sex def __str__(self): return self.name+","+str(self.age)+","+self.sex for stu in stus: student=Student() student.name=stu.find("stuname").text student.age = stu.find("stuage").text student.sex = stu.find("stusex").text p.append(student) for i in p: print(i)