#!/usr/bin/env python
#coding:utf-8
import htmllib,urllib2
import formatter,StringIO
import urllib
class TrackingParser(htmllib.HTMLParser):
"""Try to keep accurate pointer of parsing location."""
def __init__(self, writer, *args):
htmllib.HTMLParser.__init__(self, *args)
self.writer = writer
def parse_starttag(self, i):
index = htmllib.HTMLParser.parse_starttag(self, i)
self.writer.index = index
print 'vvvvvvvvvvvvvvvvvvvvvvv\n'
print index
return index
def parse_endtag(self, i):
print 'vvvvvvvvvvvvvvvvvvvvvvv\n'
self.writer.index = i
return htmllib.HTMLParser.parse_endtag(self, i)
class Paragraph:
def __init__(self):
self.text = ''
self.bytes = 0
self.density = 0.0
class LineWriter(formatter.AbstractWriter):
def __init__(self, *args):
self
python 爬取网页正文
最新推荐文章于 2024-04-27 16:08:02 发布