3.29作业
将Top250的电影名称、排名、评分等数据保存在csv文件中
import requests
from re import findall
import time
time.sleep(2)
import csv
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/89.0.4389.90 Safari/537.36',
'cookie': 'douban-fav-remind=1; __gads=ID=e02b9ca56bd825c9-2225b16e36c600b1:T=1614672190:RT=1614672190:S=ALNI_MZH-f-qOgk9zBFIGTUOXTth0CJBlA; __utmz=30149280.1614672194.2.1.utmcsr=baidu|utmccn=(organic)|utmcmd=organic; bid=cr5TDfFbhxA; __utma=30149280.1855802012.1584338979.1614672194.1617009561.3; __utmb=30149280.0.10.1617009561; __utmc=30149280; __utma=223695111.1063035190.1584338980.1584338980.1617009561.2; __utmb=223695111.0.10.1617009561; __utmc=223695111; __utmz=223695111.1617009561.2.1.utmcsr=(direct)|utmccn=(direct)|utmcmd=(none); _pk_ses.100001.4cf6=*; __yadk_uid=4ne1Mmnpb4iOvmXQ6YoffVyS9seHiTl2; ap_v=0,6.0; dbcl2="141905784:lfiZNa11sdo"; ck=bhRE; _pk_id.100001.4cf6=fd6e4957fbf89dab.1584338979.2.1617010885.1584338979.; push_noty_num=0; push_doumail_num=0'
}
response = requests.get('https://movie.douban.com/top250', headers=headers)
print(response.text)
result = findall(r'<img width="100" alt="(.+?)"', response.text)
print(result)
list_names= []
list_scores= []
for x in range(0,251,25):
response = requests.get(f'https://movie.douban.com/top250?start={x}&filter=',headers = headers)
# print(response.text)
list_names += findall(r'<img width="100" alt="(.+?)"',response.text)
list_scores += findall(r'<span class="rating_num" property="v:average">(.+?)</span>',response.text)
rank1 = [x for x in range(1,251)]
result = list(map(lambda item1,item2,item3:[item1,item2,item3],rank1,list_names,list_scores))
with open('./rank.csv','a',encoding='utf-8',newline='') as f:
writer = csv.writer(f)
writer.writerow(['排名','电影名','评分'])
writer.writerows(result)