Python爬虫案例-知乎热搜

数智侠

于 2024-07-27 23:10:58 发布

阅读量63

点赞数 1

分类专栏： Python 文章标签： python 爬虫开发语言

本文链接：https://blog.csdn.net/taogumo/article/details/140743036

版权

Python 专栏收录该内容

15 篇文章 0 订阅

订阅专栏

网站地址

知乎热榜 - 知乎

爬虫代码

import requests
import time
from bs4 import BeautifulSoup
import json
def get_zhihu_hot():
    while True:
        url = "https://www.zhihu.com/billboard"
        resp = requests.get(url)
        resp.encoding = 'utf-8'
        html = resp.text
        soup = BeautifulSoup(html,'html.parser')
        news = soup.findAll(class_='HotList-itemTitle')
        # print(len(news))
        news_ls = []
        title_ls = []
        for new in news:
            title = new.text
            # print(title)
            title_ls.append(title)
        js_text_dict = json.loads(soup.find('script',{'id':'js-initialData'}).get_text())
        #print(js_text_dict['initialState']['topstory']['hotList'])
        js_text_dict = js_text_dict['initialState']['topstory']['hotList']
        url_ls = []
        for new in js_text_dict:
            url = new['target']['link']['url']
            url_ls.append(url)

        news_ls = [{'title':title_ls[i],'url':url_ls[i]} for i in range(len(title_ls))]
        news_ls.reverse()
        # print(news_ls)
        i = 0
        for new in news_ls:
            i += 1
            print(('\033[1;37m'+str(i)+'\033[0m').center(50,"*"))
            print('\033[1;36m'+new.get('title')+'\033[0m')


        news_length = len(news_ls)
        # news_ls.reverse()
        user_input = input("输入新闻编号获取进一步访问的超链接,输入q/Q退出,输入r/R刷新热榜：")
        if user_input == 'q' or user_input == 'Q':
            break
        elif user_input == 'r' or user_input == 'R':
            continue
        elif user_input in [str(i) for i in range(1,news_length+1)]:
            news_index = eval(user_input) - 1
            print(news_ls[news_index].get('url'))
            print("\033[1;33m" + "按住Ctrl键，点击超链接进行访问" + "\033[0m")
            print('\033[5;31m'+'10s后自动刷新热榜'+'\033[0m')
            time.sleep(10)
            continue
        else:
            print("Invalid User Input.")
            print('\033[5;31m'+"3s后自动刷新热榜"+'\033[0m')
            time.sleep(3)
            continue
    print("Over,退出知乎热搜!")
if __name__ == '__main__':
    get_zhihu_hot()