from bs4 import BeautifulSoup import requests import time t0 = time.perf_counter() url = "https://www.shiyanlou.com/questions/102676/?utm_source=baidu&utm_medium=cpc&utm_campaign=python&utm_term=keywords&renqun_youhua=417274" headers = { 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/47.0.2526.80 Safari/537.36' } html = requests.get(url,headers = headers).content t1 = time.perf_counter() soup = BeautifulSoup(html,"lxml") bks = soup.select("#__layout > div > div.body > div.container > div.row > div.col-md-9 > div > div > div.markdown-body > ol > li > a") i=0 t2 = time.perf_counter() for bk in bks: i = i +1 print(str(i) , bk.text) t3 = time.perf_counter() #各个步骤所耗时间 print ("read:", t1-t0) print ("soup:", t2-t1) print ("print:", t3-t2 )
Python爬取网页中的链接标题
最新推荐文章于 2024-06-20 05:30:14 发布