python 解析xml文件

xml文件内容:

<?xml version="1.0" ?>
<!--Simple xml document__chapter 8-->
<book>
	<title>
		sample xml thing
	</title>
	<author>
		<name>
			<first>
				ma
			</first>
			<last>
				xiaoju
			</last>
		</name>
		<affiliation>
			Springs Widgets, Inc.
		</affiliation>
	</author>
	<chapter number="1">
		<title>
			First
		</title>
		<para>
			I think widgets are greate.You should buy lots of them forom
			<company>
				Spirngy Widgts, Inc
			</company>
		</para>
	</chapter>
</book>



python代码:

from xml.dom import minidom, Node
import re, textwrap

########################################################################
class SampleScanner:
    """"""

    #----------------------------------------------------------------------
    def __init__(self, doc):
        """Constructor"""
        assert(isinstance(doc, minidom.Document))
        for child in doc.childNodes:
            if child.nodeType == Node.ELEMENT_NODE and \
               child.tagName == "book":
                self.handle_book(child)
                
    def handle_book(self, node):
        
        for child in node.childNodes:
            if child.nodeType != Node.ELEMENT_NODE:
                continue
            if child.tagName == "title":
                print "Book titile is:", self.gettext(child.childNodes)
            if child.tagName == "author":
                self.handle_author(child)
            if child.tagName == "chapter":
                self.handle_chapter(child)
                
    def handle_chapter(self, node):
        number = node.getAttribute("number")
        print "number:", number
        title_node = node.getElementsByTagName("title")
        print "title:", self.gettext(title_node)
        
        for child in node.childNodes:
            if child.nodeType != Node.ELEMENT_NODE:
                continue
            if child.tagName == "para":
                self.handle_chapter_para(child)
                
    def handle_chapter_para(self, node):
        company = ""
        company = self.gettext(node.getElementsByTagName("company"))
        print "chapter:para:company", company
        
                
    def handle_author(self, node):
        for child in node.childNodes:
            if child.nodeType != Node.ELEMENT_NODE:
                continue
            if child.tagName == "name":
                self.handle_author_name(child)
            if child.tagName == "affiliation":
                print "affiliation:", self.gettext(child.childNodes)
                
    def handle_author_name(self, node):
        first = ""
        last = ""
        for child in node.childNodes:
            if child.nodeType != Node.ELEMENT_NODE:
                continue
            if child.tagName == "first":
                first = self.gettext(child.childNodes)
            if child.tagName == 'last':
                last = self.gettext(child.childNodes)
                
        print "firstname:%s,lastname:%s" % (first, last)
        
                
    def gettext(self, nodelist):
        retlist = []
        for node in nodelist:
            if node.nodeType == Node.TEXT_NODE:
                retlist.append(node.wholeText)
            elif node.hasChildNodes:
                retlist.append(self.gettext(node.childNodes))
                
        return re.sub('\s+', " ", ''.join(retlist))
    
                
if __name__=="__main__":
    doc = minidom.parse("simple.xml")
    sample = SampleScanner(doc)
    


  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值