python解析xml

这里主要讲的是一个利用python解析xml和生成xml的小部分代码

test.xml文件

<?xml version="1.0" encoding="ISO-8859-1"?>
<book>
    <title>Python skills</title>
    <author>
        <name>
            <firstname>Jack</firstname>
            <lastname>Li</lastname>
        </name>
        <affliation>Smith, Tone</affliation>
    </author>

    <chapter number="1">
        <title>Simple title</title>
        <para>
            ptint "Python is a simple language to study"
        </para>
    </chapter>

    <chapter number="2">
        <title>socket programming</title>
        <para>
            sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
        </para>
    </chapter>
</book>



一个简单的解析方法

#!/usr/bin/python
#-*- coding:utf8 -*-

#首先利用dom树进行简单的解析
from xml.dom import minidom
from xml.dom import Node


def scannode(node, level=0):
    msg = node.__class__.__name__
    if node.nodeType == Node.ELEMENT_NODE:
        msg += ', tag: ' + node.tagName
    print " "*level*3, msg
    if node.hasChildNodes:
        for child in node.childNodes:
            scannode(child, level+1)

doc = minidom.parse('test.xml')
scannode(doc)

#这种方法虽然简单  但是变现的缺失不尽人意  现在我们自己手动解析



#!/usr/bin/python
#-*- coding:utf8 -*-


#这里的解析主要是针对test.xml文档进行解析
#其他的之遥随便改改就行

from xml.dom import minidom
from xml.dom import Node
import re
import textwrap

class XmlScanner:
    def __init__(self, doc):
        for child in doc.childNodes:
            if child.nodeType == Node.ELEMENT_NODE and child.tagName == 'book':
                self.handleBook(child)   #进行book下面的解析

    def handleBook(self, node):  #book标签下面的解析
        for child in node.childNodes:
            if child.nodeType != Node.ELEMENT_NODE:  #看看是否有效
                continue
            if child.tagName == 'title':
                #打印文章的名称
                print "Book title is: ",self.getText(child.childNodes)
            if child.tagName == 'author': #进行author标签的解析
                self.handleAuthor(child)
            if child.tagName == 'chapter':
                self.handleChapter(child)
            

    def handleAuthor(self, node):
        for child in node.childNodes:
            if child.nodeType != Node.ELEMENT_NODE:
                continue
            if child.tagName == 'name':
                self.handleAuthorName(child)
            if child.tagName == 'affiliation':
                print "Author affiliation: ", self.getText([child])
    
    def handleAuthorName(self, node):#两个姓和名的解析
        firstname = self.getText(node.getElementsByTagName('firstname'))
        lastname = self.getText(node.getElementsByTagName('lastname'))
        print "Author name: %s %s" %(firstname, lastname)

    def handleChapter(self, node):
        print "**************Chapter****************"
        print "number: %s:   %s" % (node.getAttribute('number'), self.getText(node.getElementsByTagName('title')))
        for child in node.childNodes:
            if child.nodeType != Node.ELEMENT_NODE:
                continue
            if child.tagName == 'para':
                self.handlePara(child)

    def handlePara(self, node):
        paraText = self.getText([node])
        paraText = textwrap.fill(paraText)
        print paraText
        print


    def getText(self, nodeList):  #最主要的一个函数
        returnList = []
        for node in nodeList:
            if node.nodeType == Node.TEXT_NODE:
                returnList.append(node.wholeText)
            elif node.hasChildNodes:
                returnList.append(self.getText(node.childNodes))
        return re.sub('\s+', ' ', ''.join(returnList))
        

class App:
    def __init__(self):
        doc = minidom.parse('test.xml')
        XmlScanner(doc)

app = App()
       



利用代码生成xml

#!/usr/bin/python
#-*- coding:utf8 -*-

#这里有一个生成xml文档的示例
from xml.dom import minidom
from xml.dom import Node

class CreateDom:
    def __init__(self):
        self.doc = minidom.Document()
        self.doc.appendChild(self.doc.createComment("this is a simple test from jack"))

    def createBook(self):
        self.book = self.doc.createElement('book')
        self.doc.appendChild(self.book)
    
    def createTitle(self):
        self.title = self.doc.createElement('title')
        self.title.appendChild(self.doc.createTextNode('python skills'))
        self.book.appendChild(self.title)

    def createAuthor(self):
        self.author = self.doc.createElement('author')
        self.book.appendChild(self.author)

        self.name = self.doc.createElement('name')
        self.author.appendChild(self.name)
        self.firstname = self.doc.createElement('firstname')
        self.lastname = self.doc.createElement('lastname')
        self.firstname.appendChild(self.doc.createTextNode('Jack'))
        self.lastname.appendChild(self.doc.createTextNode('Li'))
        self.name.appendChild(self.firstname)
        self.name.appendChild(self.lastname)
    
        self.appilication = self.doc.createElement('affilication')
        self.author.appendChild(self.appilication)
        self.appilication.appendChild(self.doc.createTextNode('Smith, Tone'))

    def createChapter(self):
        self.chapter = self.doc.createElement('chapter')
        self.book.appendChild(self.chapter)
        self.chapter.setAttribute('number', '1')
        title = self.doc.createElement('title')
        self.chapter.appendChild(title)
        title.appendChild(self.doc.createTextNode('Simple title'))
        
        para = self.doc.createElement('para')
        self.chapter.appendChild(para)
        para.appendChild(self.doc.createTextNode("ptint 'Python is a simple language to study' "))
        
        chapter = self.doc.createElement('chapter')
        self.book.appendChild(chapter)
        title = self.doc.createElement('tilte')
        chapter.appendChild(title)
        title.appendChild(self.doc.createTextNode('socket programming'))
        
        para = self.doc.createElement('para')
        chapter.appendChild(para)
        para.appendChild(self.doc.createTextNode('sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)'))
        

        

    def Show(self):
        self.createBook()
        self.createTitle()
        self.createAuthor()
        self.createChapter()
        print self.doc.toprettyxml(indent = ' ')
       

app = CreateDom()
app.Show()



  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值