import requests
from bs4 import BeautifulSoup
for i in range(10):
url='https://movie.douban.com/subject/30176393/comments?' 'start='+str(i*20)+'&limit=20&sort=new_score&status=P'
headers={
"User-Agent": "Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.86 Safari/537.36"
}
res=requests.get(url=url,headers=headers)
#print(res)
html=res.text
#print(html)
#解析静态页面
soup=BeautifulSoup(html,'lxml')
comment_div=soup.find_all('div',class_='comment')
comment_list=[]
#打开一个文件
f=open(file='movies_data.txt',mode='a',encoding='utf-8')
#遍历每一个comment_div
for item in comment_div:
if item.find('p').text is not None:
comment_list.append(item.find('p').text)
#写入
f.write(item.find('p').text)
f.close()
print(comment_list)