代码区域:
import requests
from lxml import etree
class DouBan:
urls = ['https://movie.douban.com/top250?start={}'.format(str(count)) for count in range(0, 251, 25)]
def get_movie_data(self):
for url in self.urls:
html = requests.get(url).text
html_dom = etree.HTML(html)
movie_list = html_dom.xpath('.//ol[@class="grid_view"]/li')
for movie in movie_list:
title = movie.xpath('.//span[@class="title"]/text()')[0]
msgs = movie.xpath('.//div[@class="bd"]/p[@class=""]/text()')
director_and_protagonist = msgs[0].strip().split(' ')
director = director_and_protagonist[0].replace('导演: ', '')
protagonist = director_and_protagonist[1].replace('主演: ', '') if len(
director_and_protagonist) == 2 else ''
mark = msgs[1].strip().replace(' ', '')
star = movie.xpath('.//span[@class="rating_num"]/text()')[0]
quote = movie.xpath('.//span[@class="inq"]/text()')[0]
print(url, title, director, protagonist, mark, star, quote)
db = DouBan()
db.get_movie_data()
效果展示:
以后有需要的时候可以对接到前端展示,周末有时间的时候就可以照着列表找找高质量电影观看了。薯片+快乐水,enjoying!