爬取豆瓣电影的电影数据,具体爬取的是电影名和被评论的内容
import requests
import re
for i in range(1,6):
index=(i-1)*20
url="https://movie.douban.com/review/best/?start={}".format(index)
d={"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.110 Safari/537.36",
}
resp=requests.get(url,headers=d)
# print(resp.text)
obj=re.compile(r'<div data-cid=.*?<h2><a href=.*?>(?P<name>.*?)</a></h2>.*? <p class=.*?剧透</p>(?P<comment>.*?) ',re.S)
result=obj.finditer(resp.text)
for i in result:
r=i.groupdict()
r["comment"]=r["comment"].strip()
print(r)