python xml解析html_python解析html/xml

解析html

from HTMLParser import HTMLParser

import sys

class TestParser(HTMLParser):

def __init__(self):

self.title = ''

self.readingtitle = 0

self.body = ''

self.readingbody = 0

HTMLParser.__init__(self)

def handle_starttag(self, tag, attrs):

if self.readingbody:

self.body += ''

if tag == 'title':

self.readingtitle = 1

elif tag == 'body':

self.readingbody = 1

def handle_data(self, data):

if self.readingtitle:

self.title += data

elif self.readingbody:

self.body += data

def handle_endtag(self, tag):

if tag == 'title':

self.readingtitle = 0

elif tag == 'body':

self.readingbody = 0

if self.readingbody:

self.body += ''

def gettitle(self):

return self.title

def getbody(self):

return self.body

# testparser.py test.html

#fd = open(sys.argv[1])

fd = open("test.html");

tp = TestParser()

tp.feed(fd.read())

print "Title is:", tp.gettitle()

print "Body is:", tp.getbody()

对于不严格的html(比如缺少关闭tag),可以使用TidyLib。

Document Title

This is a text

输出

>>>

Title is: Document Title

Body is:

This is a text

解析xml

from xml.dom import minidom, Node

import sys

def scanNode(node, level = 0):

msg = node.__class__.__name__

if node.nodeType == Node.ELEMENT_NODE:

msg += ", tag: " + node.tagName

print level, msg

if node.hasChildNodes:

for child in node.childNodes:

scanNode(child, level + 1)

# testparser.py test.xml

doc = minidom.parse("test.xml");

scanNode(doc)test.xml

fengrufeitun

12

输出

>>>

0 Document

1 Element, tag: books

2 Text

2 Element, tag: book

3 Text

3 Element, tag: name

4 Text

3 Text

3 Element, tag: price

4 Text

3 Text

2 Text

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
make /usr/bin/python ./c_client.py -p //usr/lib/python3.8/site-packages //usr/share/xcb/xproto.xml /usr/bin/python ./c_client.py -p //usr/lib/python3.8/site-packages //usr/share/xcb/bigreq.xml /usr/bin/python ./c_client.py -p //usr/lib/python3.8/site-packages //usr/share/xcb/xc_misc.xml /usr/bin/python ./c_client.py -p //usr/lib/python3.8/site-packages //usr/share/xcb/composite.xml /usr/bin/python ./c_client.py -p //usr/lib/python3.8/site-packages //usr/share/xcb/damage.xml /usr/bin/python ./c_client.py -p //usr/lib/python3.8/site-packages //usr/share/xcb/dpms.xml /usr/bin/python ./c_client.py -p //usr/lib/python3.8/site-packages //usr/share/xcb/dri2.xml /usr/bin/python ./c_client.py -p //usr/lib/python3.8/site-packages //usr/share/xcb/glx.xml /usr/bin/python ./c_client.py -p //usr/lib/python3.8/site-packages //usr/share/xcb/randr.xml /usr/bin/python ./c_client.py -p //usr/lib/python3.8/site-packages //usr/share/xcb/record.xml /usr/bin/python ./c_client.py -p //usr/lib/python3.8/site-packages //usr/share/xcb/render.xml /usr/bin/python ./c_client.py -p //usr/lib/python3.8/site-packages //usr/share/xcb/res.xml /usr/bin/python ./c_client.py -p //usr/lib/python3.8/site-packages //usr/share/xcb/screensaver.xml /usr/bin/python ./c_client.py -p //usr/lib/python3.8/site-packages //usr/share/xcb/shape.xml /usr/bin/python ./c_client.py -p //usr/lib/python3.8/site-packages //usr/share/xcb/shm.xml /usr/bin/python ./c_client.py -p //usr/lib/python3.8/site-packages //usr/share/xcb/sync.xml /usr/bin/python ./c_client.py -p //usr/lib/python3.8/site-packages //usr/share/xcb/xevie.xml /usr/bin/python ./c_client.py -p //usr/lib/python3.8/site-packages //usr/share/xcb/xf86dri.xml /usr/bin/python ./c_client.py -p //usr/lib/python3.8/site-packages //usr/share/xcb/xfixes.xml /usr/bin/python ./c_client.py -p //usr/lib/python3.8/site-packages //usr/share/xcb/xinerama.xml /usr/bin/python ./c_client.py -p //usr/lib/python3.8/site-packages //usr/share/xcb/xinput.xml Traceback (most recent call last): File "./c_client.py", line 1039, in <module> module.register() File "/usr/lib/python2.7/dist-packages/xcbgen/state.py", line 93, in register matcher.execute(self, self.namespace) File "/usr/lib/python2.7/dist-packages/xcbgen/matcher.py", line 115, in execute funcs[elt.tag](elt, module, namespace) KeyError: 'eventstruct' make: *** [Makefile:1018: xinput.c] Error 1
05-24

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值