# 案例模板
html = """
<html><head><title>The Dormouse's story</title></head >
<body>
<p class="title" name="dromouse"><b>The Dormouse's story</b>
Once upon a time there were three little sisters; and their names were
<a href="http://example.com/elsie" class="sister" id="linkl"><span>Elsie</span></a>and they lived at the bottom of a well.</p>
<p class="story">...</p>
"""
from bs4 import BeautifulSoup
soup = BeautifulSoup(html, 'lxml')
# 获取直接子节点contents方法
print(soup.p.contents)
# children方法获取直接子节点
print(soup.p.children)
for i, child in enumerate(soup.p.children):
print(i, child) # 返回结果是生成器类型
# descendants属性获取子孙节点
print(soup.p.descendants)
for i, child in enumerate(soup.p.descendants):
print(i, child)
# 以列表形式输出参数值为ul的所有ul节点
print(soup.find_all(name='ul'))
# 返回选取的类型<class 'bs4.element.Tag'>
print(type(soup.find_all(name='ul')[0]))
# 嵌套循环ul节点下面的li节点
for ul in soup.find_all(name='ul'):
print(ul.find_all(name='li'))
for li in ul.find_all(name='li'):
print(li.string)
import re
from bs4 import BeautifulSoup
html = '''
<div class="panel">
<div class="panel-body">
<a>Hello, this is a link</a >
<a>Hello, this is a link, too</a>
</div >
</div>
'''
soup = BeautifulSoup(html, 'lxml')
print(soup.find_all(text=re.compile('link')))