基于JSON的web信息提取
import requests
from bs4 import BeautifulSoup
url="http://bang.dangdang.com/books/fivestars/1-1"
header={'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/111.0.0.0 Safari/537.36'}
res=requests.get(url,headers=header)
res.encoding=res.apparent_encoding
#print(res.text)
soup=BeautifulSoup(res.text,"lxml")
book_list=soup.select("div.bang_list_box > ul > li")
print(len(book_list))
for book in book_list:
name=book.select("div.name > a")[0].text
cishu=book.select("div.biaosheng > span")[0].text
zuozhe=book.select("div.publisher_info > a")[0].text
chubanriqi= book.select("div.publisher_info > span")[0].text
chubanshe = book.select("div.publisher_info > a")[0].text
jiage=book.select("div.price > p > span ")[0].text
print("书名:",name,"评分次数:",cishu,"作者名:",zuozhe,"出版日期:",chubanriqi,"出版社:",chubanshe,"价格:",jiage)
![](https://i-blog.csdnimg.cn/blog_migrate/7dd6fd0168da34e36fbb7d507783a32f.png)