XML解析方式有SAX,DOM两种,单纯读取XML文档的话SAX有先天优势。下面介绍一下SAX解析XML:
import xml.sax
from xml.sax import make_parser
from xml.sax.handler import ContentHandler
class ParseHandler(ContentHandler):
def __init__(self):
self.CurrentData = ""
self.type = ""
self.format = ""
self.year = ""
self.ration = ""
self.stars = ""
self.description = ""
def characters(self, content):
print(content.strip().replace(' ', '').replace('\n', '').replace('\t', '').replace('\r', '').strip())
def startDocument(self):
print("解析开始")
def endDocument(self):
print("解析结束")
def startElement(self, name, attrs):
if name == 'movie':
print("startElement", name, "attrs", attrs["title"])
else:
print("startElement", name, "attrs")
def endElement(self, name):
print("endElement", name)
if __name__ == '__main__':
parser = make_parser()
parser.setFeature(xml.sax.handler.feature_namespaces,0)
Handel = ParseHandler()
parser.setContentHandler(Handel)
parser.parse("movies.xml")
文件movies.xml内容如下:
<?xml version="1.0" encoding="UTF-8"?> <collection shelf="New Arrivals"> <movie title="Enemy Behind"> <type>War, Thriller</type> <format>DVD</format> <year>2003</year> <rating>PG</rating> <stars>10</stars> <description>Talk about a US-Japan war</description> </movie> <movie title="Transformers"> <type>Anime, Science Fiction</type> <format>DVD</format> <year>1989</year> <rating>R</rating> <stars>8</stars> <description>A schientific fiction</description> </movie> <movie title="Trigun"> <type>Anime, Action</type> <format>DVD</format> <episodes>4</episodes> <rating>PG</rating> <stars>10</stars> <description>Vash the Stampede!</description> </movie> <movie title="Ishtar"> <type>Comedy</type> <format>VHS</format> <rating>PG</rating> <stars>2</stars> <description>Viewable boredom</description> </movie> </collection>