导入模块
from bs4 import BeautifulSoup
import bs4
创建soup对象(读取file文件创建)
filepath = 'test.html'
soup = BeautifulSoup(open(filepath,encoding='utf-8'),'lxml')
使用
根据标签查找(type:bs4_obj)
tag_p = soup.p
获取属性
name = tag_p.name
title = tag_p.attrs.get('title')
title = tag_p.get('title')
title = tag_p['title']
获取文本内容
string = tag_p.string
text = tag_p.get_text()
content = tag_p.contents
if type(tag_p.string)==bs4.element.Comment:
print('这是注释内容')
else:
print('这不是注释')
获取子孙节点(tpye:generator)
descendants = soup.p.descendants
find&&find_all查找
soup.find('a')
soup.find('a',title='hhh')
soup.find('a',id='')
soup.find('a',class_='')
soup.find_all('a')
soup.find_all(['a','p'])
soup.find_all('a',limit=2)
select选择(type:list)
soup.select('.main > ul > li > a')[0].string