from bs4 import BeautifulSoup import requests book_list = [] #用于存储所有的信息,书籍以字典的形式保存 def getInfo(url): #获取页面信息 headers={"User-Agent":"Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/106.0.0.0 Safari/537.36"} res=requests.get(url,headers=headers) #提取待定信息,并封装成字典 soup=BeautifulSoup(res.text,'html.parser') lis=soup.select('#csproduct > li') for li in lis: title=li.select('a > span')[0].text p=li.select('a > p') author=p[0].text isbn=p[1].text price=p[2].text print(title,author,isbn,price) dic={'title':title,'author':author,"isbn":isbn,'price':price} book_list.append(dic) #将字典存入book_list #获取多页数据 def getBooks(page_num): #通过循环调用getInfo() 获取多页数据 #(1)拼接网址 #(2)调用getInfo() url1='http://www.tup.tsinghua.edu.cn/bookscenter/booklist.html?keyword=%e5%a4%a7%e6%95%b0%e6%8d%ae&keytm=8d333327918b976e8c&page=' for i in range(1,page_num+1): url= url1+str(i) getInfo(url) #将列表book_list中的数据存入文件 def saveData(): res='' i = 1 for book in book_list: res+=str(i)+"."+book['title']+"\n"+book['author']+'\n'+book['isbn']+'\n'+book['price']+'\n\n' i=i+1 with open('book.txt','w',encoding='utf-8') as fp: fp.write(res) url = "http://www.tup.tsinghua.edu.cn/booksCenter/booklist.html?keyword=%E5%A4%A7%E6%95%B0%E6%8D%AE&keytm=8D333327918B976E8C" getInfo(url) saveData()
日常代码分享
最新推荐文章于 2024-03-04 07:56:36 发布