import requests
from bs4 import BeautifulSoup
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.25 Safari/537.36 Core/1.70.3676.400 QQBrowser/10.5.3738.400'}
url = 'https://book.douban.com/subject/1084336/comments/'
r = requests.get(url=url,headers = headers).text
soup = BeautifulSoup(r,'lxml')
pattern = soup.find_all('span','short') #span代表标签值,short是属性值
for item in pattern:
print(item.string)
beautiful soup爬取网页
最新推荐文章于 2023-01-06 13:42:29 发布