今天就用我们的xpath解析方式解析
内容很简单
代码如下:
import requests
from lxml import etree
'''
url = 'https://www.maoyan.com/board/4'
'''
for page in range(10):
url = f'https://www.maoyan.com/board/4?offset={page*10}'
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.55 Safari/537.36 Edg/96.0.1054.43'
}
resopnse = requests.get(url, headers=headers, timeout=2)
result = resopnse.text
#解析数据
element = etree.HTML(result)
movie_name = element.xpath('//p[@class="name"]/a/@title')
# print(movie_name)
movie_people = element.xpath('//p[@class="star"]/text()')
# print(movie_people)
movie_time = element.xpath('//p[@class="releasetime"]/text()')
# print(movie_time)
for movie_name,movie_people in zip(movie_name, movie_people):
print(movie_name)
print(movie_people)
data = f'{movie_name} {movie_people}'
#保存数据
with open('猫眼.txt', 'a',)as f:
f.write(data + '\n')