from bs4 import BeautifulSoup
import re
<body>
<p class="title cls1 cls2" name="dromouse"><b>The Dormouse's story</b></p>
<p class="story">Once upon a time there were three little sisters; and their names were
<a href="http://example.com/elsie" class="sister test" id="link1"><!-- Elsie --></a>,
<a href="http://example.com/lacie" class="sister" id="link2">Lacie</a> and
<a href="http://example.com/tillie" class="sister" id="link3">Tillie</a>;
"""
#select 支持css查找
# 1 标签选择器
# result=soup.select('p')
# 2类选择器(单个类选择器)
# result=soup.select('.sister')
# 多个类选择器:注意中间没有`空格`
# result=soup.select('.sister.test')
import re
html =
"""
<html><head><title>The Dormouse's story</title></head><body>
<p class="title cls1 cls2" name="dromouse"><b>The Dormouse's story</b></p>
<p class="story">Once upon a time there were three little sisters; and their names were
<a href="http://example.com/elsie" class="sister test" id="link1"><!-- Elsie --></a>,
<a href="http://example.com/lacie" class="sister" id="link2">Lacie</a> and
<a href="http://example.com/tillie" class="sister" id="link3">Tillie</a>;
and they lived at the bottom of a well.
</p>
<p class="story">...</p>"""
soup=BeautifulSoup(html,'lxml')
#select 支持css查找
# 1 标签选择器
# result=soup.select('p')
# 2类选择器(单个类选择器)
# result=soup.select('.sister')
# 多个类选择器:注意中间没有`空格`
# result=soup.select('.sister.test')