html = '''
The Domouse's storyThe Dormouse's story
Once upon a time there were little sisters;and their names were
Lacleand
and they lived at bottom of a well.
...
'''
from bs4 import BeautifulSoup
soup= BeautifulSoup(html,'lxml')
print(soup.prettify())#格式化代码,打印结果自动补全缺失的代码
print(soup.title.string)#文章标题
结果:
The Domouse's story
The Dormouse's story
Once upon a time there were little sisters;and their names were
Lacle
and
Tillie
and they lived at bottom of a well.
...
The Domouse's story
选择元素
html = '''
The Domouse's storyThe Dormouse's story
Once upon a time there were little sisters;and their names were
Lacleand
and they lived at bottom of a well.
...
'''
from bs4 import BeautifulSoup
soup = BeautifulSoup(html,'lxml')
print(soup.title)
#
The Domouse's storyprint(type(soup.title))
#
print(soup.head)
#
The Domouse's storyprint(soup.p)#当出现多个时,只返回第一个
#
The Dormouse's story
获取标签名称:
html = '''
The Domouse's storyThe Dormouse's story
Once upon a time there were little sisters;and their names were
Lacleand
and they lived at bottom of a well.
...
'''
from bs4 import BeautifulSoup
soup = BeautifulSoup(html,'lxml')
print(soup.title.name)
#title
获取属性:
html = '''
The Domouse's storyThe Dormouse's story
Once upon a time there were little sisters;and their names were
Lacleand
and they lived at bottom of a well.
...
'''
from bs4 import BeautifulSoup
soup = BeautifulSoup(html,'lxml')
print(soup.p.attrs['name'])
#dromouse
print(soup.p['name'])
#dromouse
获取标签内容:
html = '''
The Domouse's storyThe Dormouse's story
Once upon a time there were little sisters;and their names were
Lacleand
and they lived at bottom of a well.
...
'''
from bs4 import BeautifulSoup
soup = BeautifulSoup(html,'lxml')
print(soup.p.string)
#The Dormouse's story
根据name查找
html = '''
Hello
- Foo
- Bar
- Jay
- Foo
- Bar
'''
from bs4 import BeautifulSoup
soup = BeautifulSoup(html,'lxml')
print(soup.find_all('ul'))#列表类型
print(type(soup.find_all('ul')[0]))
结果:
[
- Foo
- Bar
- Jay
- Foo
- Bar