实现思路
1.明确目标url:https://bangumi.tv/book/brower?sort=rank&page=1
get请求
2.对目标url发起请求,获取响应
3.实践知是html的响应用bs4对数据做解析
注:
1.用CTRL+F查询检索运行结果
2.如查不到可能是因为乱码了
meta标签→charset
eg:charset='utf-8'
response.encoding='utf-8'
3.快速导包
鼠标放在红线上按Alt和回车
import requests
url1='https://bangumi.tv/book/browser?sort=rank&page=1'
headers1={
'user-agent':
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/129.0.0.0 Safari/537.36'
}
response=requests.get(url1,headers=headers1)
#=response.text
response.encoding='utf-8'
#print(response.text)
print('======================================================================================================')
from bs4 import BeautifulSoup
soup=BeautifulSoup(response.text,'html.parser')
#元素面板→定位→找到每个li标签对应一条数据
li_list=soup.select('ul#browserItemList>li') #在元素面板按ctrl与F筛选结果相同
for data in li_list:
#print(data)#data对应的每一条数据也就是li
title=data.select('h3')[0].text #select返回的是列表
title1=title.replace('\n','') #标题的数据
#print(title1)
info=data.select('p.info.tip')[0].text #多个class值用. 连用
info1=info.replace('\n','') .replace(' ','')
#print(info1)
rateInfo = data.select('p.rateInfo>small')[0].text
rateInfo1 = rateInfo.replace('\n', '').replace(' ', '')
people = data.select('p.rateInfo>span')[1].text
people1 = people.replace('\n', '').replace(' ', '')
#print(info1,title1,rateInfo1)
paiming = data.select('span.rank')[0].text # select返回的是列表
paiming1 = paiming.replace('\n', '')
print(info1,title1,rateInfo1,people1,paiming1)