python爬虫3：bt4的使用

最新推荐文章于 2023-04-02 19:16:13 发布

goxingman

最新推荐文章于 2023-04-02 19:16:13 发布

阅读量539

点赞数

分类专栏： python 文章标签： python 爬虫开发语言

本文链接：https://blog.csdn.net/goxingman/article/details/121967171

版权

python 专栏收录该内容

8 篇文章 0 订阅

订阅专栏

import bs4
import re


def getData():
    file = open("./tb.html", "rb")
    html = file.read()
    soup = bs4.BeautifulSoup(html, "html.parser")
    #获取所有标签
    fd = soup.find_all()

    #获取某个标签内容 (只取第一个此标签)
    div = soup.div
    div = soup.div.string
    #标签内属性
    attrs = soup.tr.attrs
    #标签内内容
    tr = soup.tr.contents

    #查找标签
    find_all = soup.find_all("tr")

    #正则表达式搜索
    soup_find_all = soup.find_all(re.compile("t"))

    #根据特定条件查找
    r = soup.find_all(colspan="2")
    result_set = soup.find_all(type=True)  #=True代表有这个的
    result_set = soup.find_all(class_=True) #class特殊字符 加_
    all1 = soup.find_all(text=["测试", "3"])

    #css选择器
    select = soup.select(".a")
    select = soup.select("table > div") #根据递进层级查找
    select = soup.select("input ~ div") #根据同层级查找

    print(select)

getData()