解析html,得到想要的数据
from HTMLParser import HTMLParser
import urllib
class MyHTMLParser(HTMLParser):
def __init__(self):
HTMLParser.__init__(self)
self._count=0
self._events=dict()
self._flag=None
# 开始标签
def handle_starttag(self,tag,attrs):
if tag=='h3' and attrs.__contains__(('class','event-title')):
self._flag='even-title'
self._count+=1
self._events[self._count]=dict()
if tag=='time':
self._flag='time'
if tag=='span' and attrs.__contains__(('class','event-location')):
self._flag='event-location'
# 标签中的元素
def handle_data(self,data):
if self._flag=='even-title':
self._events[self._count]['even-title']=data
if self._flag=='time':
self._events[self._count]['time']=data
if self._flag=='event-location':
self._events[self._count]['event-location']=data
self._flag=None
def get_events(self):
return self._events
try:
page=urllib.urlopen('https://www.python.org/events/python-events/')
html=page.read()
except IOError,e:
print e
else:
parser=MyHTMLParser()
parser.feed(html)
events=parser.get_events()
print events
finally:
if page:
page.close()