下面是获取最新新闻列表及链接
from bs4 import BeautifulSoup
import requests
import time
t0 = time.perf_counter()
url = "https://www.imf.org/external/what/whatsnewenglish/what.aspx?Page=1"
headers = {
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/47.0.2526.80 Safari/537.36'
}
html = requests.get(url,headers = headers).content
t1 = time.perf_counter()
soup = BeautifulSoup(html, "lxml").select("#content h4 a")
i=0
t2 = time.perf_counter()
for sp in soup:
i = i +1
print(str(i), sp.parent.next_sibling.select("span")[0].text, sp.text.strip())
print("https://www.imf.org/" + sp.attrs['href'])
t3 = time.perf_counter()
#各个步骤所耗时间
print("read:", t1-t0)
print("soup:", t2-t1)
print("print:", t3-t2)
下面是获取最新世界经济展望报告列表
from bs4 import BeautifulSoup
import requests
import time
t0 = time.perf_counter()
url = "https://www.imf.org/en/Publications/WEO"
headers = {
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/47.0.2526.80 Safari/537.36'
}
html = requests.get(url,headers = headers).content
t1 = time.perf_counter()
soup = BeautifulSoup(html,"lxml").select(".result-row")
i=0
t2 = time.perf_counter()
for s in soup:
i = i +1
print(str(i), s.select("a")[0].text.strip(), list(s.select("p"))[0].text.strip())
print(url + s.select("a")[0].attrs['href'])
t3 = time.perf_counter()
#各个步骤所耗时间
print("read:", t1-t0)
print("soup:", t2-t1)
print("print:", t3-t2)