python爬取全网热搜

最新推荐文章于 2024-06-17 17:35:06 发布

Ctrl_CV攻城狮

最新推荐文章于 2024-06-17 17:35:06 发布

阅读量19

点赞数

分类专栏： Python 文章标签： python 开发语言

Python 专栏收录该内容

8 篇文章 1 订阅

订阅专栏

#来源python学霸

#演示地址
pip install mechanicalsoup

#实例
import mechanicalsoup

def fetch_data_from_website():
    url = 'https://tophub.today/'
    headers = {
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3"
    }
    browser = mechanicalsoup.StatefulBrowser()
    browser.session.headers.update(headers)
    browser.open(url)
    soup = browser.get_current_page()

    result = []
    div_cc_cd_list = soup.select('div.cc-cd')
    for div_cc_cd in div_cc_cd_list:
        category = div_cc_cd.select_one('.cc-cd-is').text.strip()
        a_tags = div_cc_cd.select('.cc-cd-cb a')
        links = []
        for a_tag in a_tags:
            text = a_tag.text.strip()
            link = a_tag.get('href')
            if link:
                links.append({'text': text, 'link': link})
        result.append({'category': category, 'links': links})

    return result
#热搜爬取
print(fetch_data_from_website())

输出效果：