#coding=utf-8
import urllib2
from HTMLParser import HTMLParser
class HttpParser(HTMLParser):
def __init__(self):
HTMLParser.__init__(self)
self.move=[]
def handle_starttag(self,tag,attrs):
def _attr(attrlist,attrname):
for attr in attrlist:
if attr[0]==attrname:
return attr[1]
return None
if tag=='li' and _attr(attrs,'data-title'):
move={}
move['title']=_attr(attrs,'data-title')
move['rate'] = _attr(attrs, 'data-rate')
self.move.append(move)
def get(url):
re=urllib2.urlopen(url)
parser=HttpParser()
parser.feed(re.read())
re.close()
return parser.move
if __name__ == '__main__':
url='https://movie.douban.com/'
moves=get(url)
import json
print json.dumps(moves,ensure_ascii=False,indent=2)