代码
import requests
from bs4 import BeautifulSoup
import csv
start=25
Movie_url = []
Movie_name = []
top=[]
head=["电影名","链接"]
headers={
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/94.0.4606.71 Safari/537.36"
}
def init(n):
global Movie_url
global Movie_name
Movie_url.clear()
Movie_name.clear()
url=f"https://movie.douban.com/top250?start={25*n}&filter="
page=requests.get(url=url,headers=headers)
soup=BeautifulSoup(page.text,'lxml')
moive_url=soup.select(".pic>a")
moive_name=soup.select(".pic>a>img")
for i in moive_url:
Movie_url.append(i['href'])
for i in moive_name:
Movie_name.append(i['alt'])
for i in range(25):
top.append([Movie_name[i],Movie_url[i]])
if __name__=="__main__":
with open("Top250.csv", 'w', newline='') as f:
writer=csv.writer(f)
writer.writerow(head)
for i in range(10):
init(i)
for i in range(25*10):
writer.writerows([top[i]])
print(f"爬取{top[i][0]}成功")
效果图
![在这里插入图片描述](https://img-blog.csdnimg.cn/95e175b2144543d3932d873758371540.png?x-oss-process=image/watermark,type_ZHJvaWRzYW5zZmFsbGJhY2s,shadow_50,text_Q1NETiBAWWVzaXJfQw==,size_20,color_FFFFFF,t_70,g_se,x_16)