贴上源代码,为什么爬虫结果为null
通过定义函数的方式,抓取豆瓣top250电影信息,使得总体结构更加清晰
import requests # 导入网页请求库
from bs4 import BeautifulSoup # 导入网页解析库
import json
用于发送请求,获得网页源代码以供解析
def start_requests(url, headers):
r = requests.get(url, headers)
return r.content
接收网页源代码解析出需要的信息
def parse(text):
soup = BeautifulSoup(text, ‘html.parser’)
movie_list = soup.find_all(‘div’, class_=‘item’)
result_list = []
for movie in movie_list:
mydict = {} # 创建一个列表存储所有结果
mydict['title'] = movie.find('span', class_='title').text
mydict['score'] = movie.find('span', class_='rating_num').text
mydict['quote'] = movie.find('span', class_='inq').text
star = movie.find('div', class_='star