引入模块
import requests
from bs4 import BeautifulSoup
爬取网页内容
# 起始页面
star_url = 'http://quotes.toscrape.com'
# 刚开始时下一页就是起始页
next_pag_url = star_url
# 不停获取下一页
while next_pag_url:
url = requests.get(next_pag_url)
url.raise_for_status()
bs = BeautifulSoup(url.text,'html.parser')
# 获取下一页按钮
next_pag = bs.find('li',class_ = 'next')
# 如果没有下一页就结束
if not next_pag:
break
# 去下一页链接
next_pag_url = next_pag.a['href']
# 拼装链接成完整的URL
next_pag_url = star_url + next_pag_url
# print(url.text)
# 解析出当前页面的内容
div_list = bs.find_all('div',class_='quote')
# print(div_list)
for div in div_list:
print(div.small.text,':',div.span.text)