import requests
from bs4 import BeautifulSoup
#request 豆瓣url,修改一下headers
def open_url(url):
headers = {
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/57.0.2987.98 Safari/537.36'}
res = requests.get(url, headers=headers)
return res
#获取top250电影总共有多少页面
def find_pages(res):
soup = BeautifulSoup(res.text, "html.parser")
page = soup.find("span", class_="next")
nums = page.previous_sibling.previous_sibling.text
return int(nums)
#获取top250电影相关信息
def find_movies(res):
result=[]
soup = BeautifulSoup(res.text, "html.parser")
# 收集电影名字
movie_name=[]
targets = soup.find_all("div", class_="hd")
for each in targets:
movie_name.append(each.a.span.text)
# 收集评分
grade=[]
targets = soup.find_all("span", class_="
爬虫爬取豆瓣Top250电影 生成本地.txt文件
最新推荐文章于 2022-11-08 14:35:55 发布