import requests, random, re, json
from fake_useragent import UserAgent
# https://catfront.dianping.com/api/batch?v=1&sdk=1.7.11
url = 'https://maoyan.com/board/7'
ua = UserAgent()
headers = {
'User-Agent': ua.random
}
response = requests.get(url, headers=headers).text
# print(type(response))
# print(response)
# with open('maoyan.html','w',encoding='utf-8') as f:
# f.write(response)
movie_list = []
film = re.compile(r'<dd>(.*?)</dd>', re.S)
film_list = film.findall(response)
for movie in film_list:
movie_dic = {}
content = re.compile(r'.*?board-index.*?>(\d+)</i>.*?<a.*?title="(.*?)".*?'
+ '<img data-src="(.*?)".*?'
+ '<p class="star">(.*?)</p>.*?'
+ '<p class="releasetime">(.*?)</p>.*?'
+ '<i class="integer">(.*?)</i>'
+ '<i class="fraction">(.*?)</i>.*?', re.S)
content_list = content.findall(movie)
# print(content_list)
for i in content_list:
movie_dic['rank'] = i[0]
movie_dic['movie_name '] = i[1]
movie_dic['magor_actor'] = i[3].strip()
movie_dic['image_url'] = i[2]
movie_dic['show_time'] = i[4]
movie_dic['score'] = str(i[5]) + str(i[6])
movie_list.append(movie_dic)
print(movie_list)
with open('maoyan.json', 'w', encoding='utf-8') as f:
json.dump(movie_list, f, ensure_ascii=False)
爬虫-猫眼电影
最新推荐文章于 2024-03-22 11:17:40 发布