import requests
from bs4 import BeautifulSoup
import re
import xlwt
def getContent(row): #从第几条开始
headers1 = {'User-Agent': 'Mozilla/4.0', 'content-type': 'text/html; charset=utf-8'}
r = requests.get("https://movie.douban.com/top250?start="+str(row)+"&filter=",headers=headers1)
html = r.text
soup = BeautifulSoup(html, "html.parser")
#print(html)
return soup
films=[] #用这个做添加
#getContent(0)
def getItem(row):
soup = getContent(row)
grid_view=soup.find("ol",attrs={"class":"grid_view"})
#print(grid_view) #输出所有的ol
#访问 grid_view 的li
items = grid_view.find_all("li") #所有的li
#print(items[0]) #输出一个li 肖申克的救赎
for li in items:
film = []
#print(li.find("span",attrs={"class":"title"}).text) #输出所有标题
title = li.find("span",attrs={"class":"title"}).getText()
# print(title)
# 添加电影标题
film.append(title)
rating_num = li.fin
python 爬虫爬取豆瓣top250保存到excel
最新推荐文章于 2025-09-29 11:14:24 发布

最低0.47元/天 解锁文章
649

被折叠的 条评论
为什么被折叠?



