1.安装
pip install BeautifulSoup4
2.find和find_all
find_all()
find()
.get_text()
>tag.find('span').get_text()
.attrs['属性']
.get('class')
>tag.find('span').attrs['class']
>tag.find('span').get('class')
1.按标签查找
>soup.find('a')
2.按类查找
>soup.find(class_= 'item')
>>soup.find(attrs={'class': 'item'})
3.按id查找
>soup.find(id = 'item')
>soup.find(attrs = {'id':'item'})
4.正则匹配
>soup.find_all(name=re.compile('b'))
>soup.find_all(class_=re.compile('b'))
>>>列子
soup = BeautifulSoup(html_str, 'lxml')
span_list = soup.find('div', class_='category-class category-class1').find_all('span', class_='category-item')
print(type(span_list))
for tag in span_list:
print(type(tag))
print(tag.find('span').get_text())
3.css选择器
soup.select("title")
soup.select("html head title")
soup.select('#link1')
soup.select("p > #link1")
soup.select("a[href='https://www.baidu.com']")