html_doc ="""
<html><head><title>The Dormouse's story</title></head>
<body>
<p class="title"><b>The Dormouse's story</b></p>
<p class="story">Once upon a time there were three little sisters; and their names were
<a href="http://example.com/elsie" class="sister" id="link1">Elsie</a>,
<a href="http://example.com/lacie" class="sister" id="link2">Lacie</a> and
<a href="http://example.com/tillie" class="sister" id="link3">Tillie</a>;
and they lived at the bottom of a well.</p>
<p class="story">...</p>
"""from bs4 import BeautifulSoup
soup = BeautifulSoup(html_doc,'html.parser')
'\n'"The Dormouse's story"'\n''\n'"The Dormouse's story"'\n''Once upon a time there were three little sisters; and their names were\n''Elsie'',\n''Lacie'' and\n''Tillie'';\nand they lived at the bottom of a well.''\n''...''\n'
使用 .stripped_strings 可以去除多余空白内容:
# 使用 .stripped_strings 可以去除多余空白内容:for string in soup.stripped_strings:print(repr(string))
输出为:
"The Dormouse's story""The Dormouse's story"'Once upon a time there were three little sisters; and their names were''Elsie'',''Lacie''and''Tillie'';\nand they lived at the bottom of a well.''...'
for sibling in soup.a.next_siblings:print(repr(sibling))for sibling in soup.find(id="link3").previous_siblings:print(repr(sibling))
输出为:
',\n'<a class="sister" href="http://example.com/lacie"id="link2">Lacie</a>' and\n'<a class="sister" href="http://example.com/tillie"id="link3">Tillie</a>';\nand they lived at the bottom of a well.'' and\n'<a class="sister" href="http://example.com/lacie"id="link2">Lacie</a>',\n'<a class="sister" href="http://example.com/elsie"id="link1">Elsie</a>'Once upon a time there were three little sisters; and their names were\n'