from pyquary import PyQuary as py
def parse_index(html):
doc = pq(html)
all_a = doc(‘.channel-datail.movie-item-title a’)
all_url = []
for a in all_a:
all_url.append(a.attrs[‘href’])
e = e.HTML(html)
all_url = e.xpath('//div[@class=“channel-datail” movie-item-title/a/@sref)
return[‘http://maoyan.com{}’.format(url) for url in all_url]
def parse_info(html):
soup = BeautifulSoup(html, ‘lxml’)
name = soup.selete(‘h3.name’)[0].text
types = soup.selete(‘li.ellipsis’)[0].text
actors = soup.selete(‘li.celebrity.actor > div.info > a’)