爬取豆瓣电影中《我和我的家乡》的所有影评信息,并将其存入csv文件(文件应包含编号,影评文字、发布时间3项内容)
#!/usr/bin/python3
#auther:kaqiulee
#coding :utf-8
import requests
from bs4 import BeautifulSoup
import pandas as pd
status = 200
kv = {
"User-Agent": "Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.138 Safari/537.36"}
page = 0
lst1 = []
lst2 = []
while status == 200:
url = "https://movie.douban.com/subject/35051512/comments?start=%s&limit=20&status=P&sort=new_score" % page
re = requests.get(url=url, headers=kv)
status = re.status_code
re.encoding = "utf-8"
demo = re.text
soup = BeautifulSoup(demo, "html.parser")
shortcomment = soup.find_all("span", {"class": "short"})
date = soup.find_all("span", {"class": "comment-time"})
if len(shortcomment) == len(date):
for i in range(len(shortcomment)):
# dic={}
# print(date[i].get_text().replace("\n","").strip(),shortcomment[i].get_text().replace("\n",""))
#
lst1.append(date[i].get_text().replace("\n", "").strip())
#
lst2.append(shortcomment[i].get_text().replace("\n", ""))
page += 20 #每翻页 page值加20
df = pd.DataFrame({"时间": lst1, "评论": lst2})
df.to_csv("kaqiulee.csv", encoding="UTF-8") #填入csv文件