xml解析的一个例子,如下所示:
#!/usr/bin/env python
#-*-coding:utf-8-*-
import xml.sax.handler
class TestHander(xml.sax.handler.ContentHandler):
def __init__(self):
self.mapping={}
self.content=""
#标签的开始
def startElement(self, name, attributes):
self.buffer=""
#if name=="link":
# self.mapping[self.buffer.encode("utf-8")]=""
#数据内容
def characters(self, data):
self.buffer+=data+"\n"
#标签的结束 ,在这里可以做数据的处理等
def endElement(self, name):
ss=self.buffer
if name=="title":
self.content+=deleteCharacter(u"标题:"+ss);
#print self.buffer.encode("utf-8")
if name=="link":
self.content+=deleteCharacter(u"链接地址:"+ss);
#print self.buffer.encode("utf-8")
if name=="description":
self.content+=deleteCharacter(u"内容:"+ss);
#print self.buffer.encode("utf-8")
if name=="pubDate":
self.content+=deleteCharacter(u"发布时间:"+ss);
#print self.buffer.encode("utf-8")
def deleteCharacter(string):
ret=""
#sub=(" ","","nbsp;","","<br />","\n","<br/>","\n","<p>","","</p>","")
ret=string.encode("utf-8")
ret=ret.replace(" ","")
ret=ret.replace("nbsp;","")
ret=ret.replace("<br />","\n")
ret=ret.replace("<br/>","\n")
ret=ret.replace("<p>","")
ret=ret.replace("</p>","")
return ret
if __name__=="__main__":
import xml.sax.handler
parser=xml.sax.make_parser()
hander=TestHander()
parser.setContentHandler(hander)
parser.parse("http://blog.sina.com.cn/rss/soundfragment.xml")
#parser.parse("c:\\rss.xml")
#print hander.content
#写入文件
file_path="c:\\wt.txt";
infile=file(file_path,"w")
infile.write(hander.content)
infile.close()
#parser.parse(unicode(file('c:\\rss.xml', 'r', 'utf-8').read(),'utf-8').encode('utf-8'))