'''
#遍历文档树:即直接通过标签名字选择,特点是选择速度快,但如果存在多个相同的标签则只返回第一个
#1、用法
#2、获取标签的名称
#3、获取标签的属性
#4、获取标签的内容
#5、嵌套选择
#6、子节点、子孙节点
#7、父节点、祖先节点
#8、兄弟节点
'''from bs4 import BeautifulSoup
html_doc ="""
<html><head><title>The Dormouse's story</title></head>
<body>
<p class="title" id='id_pp' name='lqz'>asdfasdf<b>asdfas</b><span>span<b>bbb</b></span></p>
<p class="story">Once upon a time there were three little sisters; and their names were
<a href="http://example.com/elsie" class="sister" id="link1">Elsie</a>
<a href="http://example.com/lacie" class="sister" id="link2">Lacie</a> and
<a href="http://example.com/tillie" class="sister" id="link3">Tillie</a>;
and they lived at the bottom of a well.</p>
<p class="story">...</p>
"""
soup=BeautifulSoup(html_doc,'lxml')# 遍历文档树(速度快)#6、子节点、子孙节点# print(soup.p.contents) #p下所有子节点# print(soup.p.children) #得到一个迭代器,包含p下所有子节点# print(list(soup.p.children)) #得到一个迭代器,包含p下所有子节点## for i,child in enumerate(soup.p.children):# print(i,child)## print(soup.p.descendants) #获取子孙节点,p下所有的标签都会选择出来# for i,child in enumerate(soup.p.descendants):# print(i,child)#7、父节点、祖先节点# print(soup.a.parent) #获取a标签的父节点# print(soup.a.parents) #找到a标签所有的祖先节点,父亲的父亲,父亲的父亲的父亲...# print(list(soup.a.parents)) #找到a标签所有的祖先节点,父亲的父亲,父亲的父亲的父亲...#8、兄弟节点# print(soup.a.next_sibling) #下一个兄弟# print(soup.a.previous_sibling) #上一个兄弟## print(list(soup.a.next_siblings)) #下面的兄弟们=>生成器对象# print(list(soup.a.previous_siblings)) #上面的兄弟们=>生成器对象
2.bs4的搜索文档树
from bs4 import BeautifulSoup
html_doc ="""
<html><head><title>The Dormouse's story</title></head>
<body>
<p class="title" id='id_pp' name='lqz'>asdfasdf<b>asdfas</b><span>span<b>bbb</b></span></p>
<p class="story">Once upon a time there were three little sisters;