import requests
from bs4 import BeautifulSoup
import time
import pandas as pd
# 数据存放在列表里
datas = []
kv={'user-agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.110 Safari/537.36',
'cookie':'bid=TJY9kh3SJWw; douban-fav-remind=1; ll="118281"; __utmc=30149280; dbcl2="202835581:GhhuJy32acY"; ck=NkN3; push_noty_num=0; push_doumail_num=0; __utma=30149280.620523441.1627457358.1640680787.1640708547.7; __utmb=30149280.0.10.1640708547; __utmz=30149280.1640708547.7.4.utmcsr=accounts.douban.com|utmccn=(referral)|utmcmd=referral|utmcct=/'}
page=0
while (page<=250):
r=requests.get('https://movie.douban.com/top250?start='+str(page)+'&filter=',headers=kv)
page=page+25
if (r.status_code==200):
source=r.text
soup = BeautifulSoup(source,'html.parser')
items=soup.find_all('div','item')
for i in items:
# print(i.find_all('span','title')[0].string)
# print('score:' + i.find_all(property="v:average")[0].string)
datas.append({
'name': i.find_all('span','title')[0].string,
'score': i.find_all(property="v:average")[0].string,
})
print("\n")
else:
print("目标网站不给予响应")
time.sleep(2)
print('OVER~')
# 写入到文件
df = pd.DataFrame(datas)
df.to_csv("1.csv", index=False, header=True, encoding='utf_8_sig')
使用bs4爬取豆瓣top250电影并保存到csv中
最新推荐文章于 2022-05-30 15:54:03 发布