import requests
from lxml import etree
def get_source(url):
headers={'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.97 Safari/537.36','Referer': 'https://maoyan.com/board/4?offset=0'}
response = requests.get(url,headers=headers)
return response.text
def get_html(response):
html=etree.HTML(response)
movie={}
movie['name']=html.xpath('//*[@id="app"]/div/div/div[1]/dl/dd/div/div/div[1]/p[1]/a/text()')
movie['star']=html.xpath('//*[@id="app"]/div/div/div[1]/dl/dd/div/div/div[1]/p[2]/text()')
movie['time']=html.xpath('//*[@id="app"]/div/div/div[1]/dl/dd/div/div/div[1]/p[3]/text()')
print(movie)
if __name__ == '__main__':
for i in range(0, 100, 10):
url = 'https://maoyan.com/board/4?offset=' + str(i)
get_html(get_source(url))