# sax解析(事件驱动式解析,不会在内存中加载整个文档,只会根据自己编写的事件保存数据):
# sax 解析需要继承ContentHandler并重写其中的方法,ContentHandler类中常用方法:
# startDocument() self 文档启动时调用
# endDocument() self 解析器到达文档结尾时调用
# startElement() self,name:元素名称,attrs:元素属性 遇到XML开始标签时调用
# endElement()self,name:元素名称 遇到XML结束标签时调用
# charactersself,content:文本内容 读取元素内容时调用
from xml.sax import parse
from xml.sax.handler import ContentHandler
class Student():
def __init__(self,id=None,name=None,age=None,sex=None):
self.id = id
self.name = name
self.age = age
self.sex = sex
def __repr__(self):
return '{0}\t{1}\t\t{2}\t{3}'.format(self.id,self.name,self.age,self.sex)
stuList = []
class SaxParser(ContentHandler):
def startElement(self, name, attrs):
self.name = name
if self.name == 'student':
self.stu = Student()
self.stu.id = attrs['id']
def characters(self,content):
if self.name == 'stuname':
self.stu.name = content
elif self.name == 'stuage':
self.stu.age = content
elif self.name == 'stusex':
self.stu.sex = content
def endElement(self,name):
if name == 'student':
stuList.append(self.stu)
self.name = None
parse('7月2号xml.xml',SaxParser())
for i in stuList:
print(i)