废话不多说,直接上代码
import requests
from bs4 import BeautifulSoup
import csv
import time
def crawl_douban_movie_top_250():
base_url = 'https://movie.douban.com/top250?start='
headers = { # 用户代理(User-Agent)头部信息,模拟真实的浏览器请求
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/94.0.4606.71 Safari/537.36'
}
movie_list = []
for i in range(10):
url = base_url + str(i * 25)
response = requests.get(url, headers=headers)
soup = BeautifulSoup(response.text, 'html.parser')
movie_items = soup.select('.item')
for movie_item in movie_items:
title = movie_item.select_one('.title').text.strip()
info = movie_item.select_one('.bd p').text.strip()
rating = movie_item.select_on

最低0.47元/天 解锁文章
1674

被折叠的 条评论
为什么被折叠?



