# -*- coding:utf-8 -*-
from bs4 import BeautifulSoup
html = """
<html><head><title>The Dormouse's story</title></head>
<body>
<p class="title" name="dromouse">a<b>The Dormouse's story</b>c</p>
<p class="story">Once upon a time there were three little sisters; and their names were
<a href="http://example.com/elsie" class="sister" id="link1"><!-- Elsie --></a>,
<a href="http://example.com/lacie" class="sister" id="link2">Lacie</a> and
<a href="http://example.com/tillie" class="sister" id="link3">Tillie</a>;
and they lived at the bottom of a well.</p>
<p class="story">...</p>
"""
#获取 对象,使用python默认的HTML解析器
soup = BeautifulSoup(html,'html.parser')
print(type(soup)) #<class 'bs4.BeautifulSoup'>
print(soup)
print(dir(soup)) #获取soup的方法和属性
print(soup.title) #<title>The Dormouse's story</title>
print(soup.title.name) #title
#获取标签的文本内容
print(soup.title.string) #The Dormouse's story
print(soup.title.get_text()) #The Dormouse's story
print(soup.title.text) #The Dormouse's story
print(soup.head) #<head><title>The Dormouse's story</title></head>
#通过上下级关系,获取对象 parent
print(soup.title.parent) #<head><title>The Dormouse's story</title></head>
#通过上下级关系,获取对象 child children
print(soup.p) #<p class="title" name="dromouse"><b>The Dormouse's story</b></p>
p_children = soup.p.children
print(p_children)
for i,each in enumerate(p_children):
print(i,each)
#0 a
# 1 <b>The Dormouse's story</b>
# 2 c
a = soup.a
print(a) #<a class="sister" href="http://example.com/elsie" id="link1"><!-- Elsie --></a>
print(a.name) #a
print(a.attrs) #{'href': 'http://example.com/elsie', 'class': ['sister'], 'id': 'link1'}
print(a.id) #None
print(a['id']) #link1
print(a.get('id')) #link1
print(a['class']) #['sister']
print(a.get('class')) #['sister']
print(a['href']) #http://example.com/elsie
print(a.get('href')) #http://example.com/elsie
print(soup.find('p'))
print(soup.find_all('p')) #返回list类型
print(soup.find_all('a',{'id':'link3'})) #[<a class="sister" href="http://example.com/tillie" id="link3">Tillie</a>]
a_link3 = soup.find('a',{'id':'link3'})
print(a_link3['href']) #http://example.com/tillie
print(soup.find_all('a',{'class':'sister'})) #返回所有class是sister的对象
BeautifulSoup方法和属性的调用
最新推荐文章于 2024-04-27 22:46:17 发布