BeautifulSoup方法和属性的调用

# -*- coding:utf-8 -*-
from bs4 import BeautifulSoup
html = """  
<html><head><title>The Dormouse's story</title></head>  
<body>  
<p class="title" name="dromouse">a<b>The Dormouse's story</b>c</p>  
<p class="story">Once upon a time there were three little sisters; and their names were  
<a href="http://example.com/elsie" class="sister" id="link1"><!-- Elsie --></a>,  
<a href="http://example.com/lacie" class="sister" id="link2">Lacie</a> and  
<a href="http://example.com/tillie" class="sister" id="link3">Tillie</a>;  
and they lived at the bottom of a well.</p>  
<p class="story">...</p>  
"""
#获取 对象,使用python默认的HTML解析器
soup = BeautifulSoup(html,'html.parser')
print(type(soup))   #<class 'bs4.BeautifulSoup'>
print(soup)
print(dir(soup))  #获取soup的方法和属性
print(soup.title)   #<title>The Dormouse's story</title>
print(soup.title.name)  #title

#获取标签的文本内容
print(soup.title.string)  #The Dormouse's story
print(soup.title.get_text()) #The Dormouse's story
print(soup.title.text)   #The Dormouse's story
print(soup.head)   #<head><title>The Dormouse's story</title></head>

#通过上下级关系,获取对象 parent
print(soup.title.parent) #<head><title>The Dormouse's story</title></head>

#通过上下级关系,获取对象 child children
print(soup.p)   #<p class="title" name="dromouse"><b>The Dormouse's story</b></p>
p_children = soup.p.children
print(p_children)
for i,each in enumerate(p_children):
    print(i,each)
#0 a
# 1 <b>The Dormouse's story</b>
# 2 c

a = soup.a
print(a)   #<a class="sister" href="http://example.com/elsie" id="link1"><!-- Elsie --></a>
print(a.name)  #a
print(a.attrs) #{'href': 'http://example.com/elsie', 'class': ['sister'], 'id': 'link1'}
print(a.id)   #None
print(a['id'])  #link1
print(a.get('id'))  #link1
print(a['class'])   #['sister']
print(a.get('class'))   #['sister']
print(a['href'])    #http://example.com/elsie
print(a.get('href'))   #http://example.com/elsie

print(soup.find('p'))
print(soup.find_all('p'))   #返回list类型
print(soup.find_all('a',{'id':'link3'}))   #[<a class="sister" href="http://example.com/tillie" id="link3">Tillie</a>]
a_link3 = soup.find('a',{'id':'link3'})
print(a_link3['href'])   #http://example.com/tillie
print(soup.find_all('a',{'class':'sister'}))  #返回所有class是sister的对象
  • 0
    点赞
  • 4
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值