bs4是对某些内容做逻辑的提取,比如html文件,xml,json格式的数据
bs4: 的安装 >pip install --user bs4
使用:
from bs4 import BeautifulSoup
page = BeautifulSoup(data,"html.parser") #指定html解释器
#2。从bs4 找数据 find(标签 ,属性=值) find_all(标签 ,属性=值)
table=page.find('div',class_="news-nr-box")
#class是python的关键字,
trs = table.find_all('tr',align="center",bgcolor="#FFFFFF")
for tr in trs:
tds = tr.find_all('td')
name = tds[0].text#.text 表示提取到被标签标记的内容
high = tds[1].text
low = tds[2].text
xianjia = tds[3].text
danwei = tds[4].text
date = tds[5].text