def parse_index(html):
all_url = re.findall(r’<a href="(/films/\d+)"target=“blank"data_get=“movies-click"data-vel=”{movieId:\d+}“class=””>.+,
html])
return [http://www.movie.com{}.format(url) for url in all_url]
def parse_info(html):
name = re.findall(r’
(.+)
types = re.findall(r’
- (.+)
actors = re.findall(r’<li class = “celebrity actor”.+>\s+<a self = "/films/cel.+>\s+.htmlactors = e,xpath('//li[@class = "celebrity actor"]/div['class = "info"]/a/text()') actors = format_actor( actor) return{ "name":name, "types":types, "actor":actor }