import requests
import json
import csv
if __name__ == "__main__":
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36'
}
fp = open('D:/Python/爬虫/douban1.csv', 'w', encoding='utf-8-sig', newline='')
csv_writer = csv.writer(fp)
# csv_writer.writerow(["片名", "导演", "评分","网址","主演","海报","id"])
for i in range(0,5000,20): #爬5000部电影
url = "https://movie.douban.com/j/new_search_subjects?sort=U&range=0,10&tags=&start=" + str(i)
# print(url)
r=requests.get(url,timeout=30,headers=headers).text
h = json.loads(r) #json文件转列表
for k in range(0,20):
name = h['data'][k]['title']
# ','.join(list2) #把列表中的元素放在空串中,元素间用逗号隔开。 []里面:使用列表推导式把列表中的单个元素全部转化为str类型
DaoYan1 = h['data'][k]['directors']
DaoYan = "".join([str(x) for x in DaoYan1])
PingFen = h['data'][k]['rate']
url = h['data'][k]['url']
ZhuYan1 = h['data'][k]['casts']
ZhuYan = "".join([str(x) for x in ZhuYan1])
img_url = h['data'][k]['cover']
id = h['data'][k]['id']
print(name,DaoYan,PingFen,url,ZhuYan,img_url,id)
# print(name,DaoYan,PingFen,url,ZhuYan,img_url,id)
csv_writer.writerow([name, DaoYan, PingFen, url, ZhuYan,img_url,id])
豆瓣电影爬取(分类)并存到csv中
最新推荐文章于 2023-11-11 16:21:43 发布