目标网站
import re
import requests
from bs4 import BeautifulSoup
导库
res = requests.get('https://pubmed.ncbi.nlm.nih.gov/34311758/')
print(res.status_code)
向目标网页发送请求
string = res.text
soup = BeautifulSoup(string,'html.parser')
利用BS库对网页进行解析,得到解析对象soup
li_list = soup.find('div', class_='abstract-content selected').find_all('p')
定位<p>标签,得到的结果:
#预处理解析结果
li_list_Process = []
for i in li_list:
#print(type(str(i)))
i = str(i).replace('\n','')
i = re.findall(r'<\/strong&g