网站地址
知乎热榜 - 知乎
![](https://i-blog.csdnimg.cn/direct/010f406667014526977358a631b7ba3a.png)
爬虫代码
import requests
import time
from bs4 import BeautifulSoup
import json
def get_zhihu_hot():
while True:
url = "https://www.zhihu.com/billboard"
resp = requests.get(url)
resp.encoding = 'utf-8'
html = resp.text
soup = BeautifulSoup(html,'html.parser')
news = soup.findAll(class_='HotList-itemTitle')
# print(len(news))
news_ls = []
title_ls = []
for new in news:
title = new.text
# print(title)
title_ls.append(title)
js_text_dict = json.loads(soup.find('script',{'id':'js-initialData'}).get_text())
#print(js_text_dict['initialState']['topstory']['hotList'])
js_text_dict = js_text_dict['initialState']['topstory']['hotList']
url_ls = []
for new in js_text_dict:
url = new['target']['link']['url']
url_ls.append(url)
news_ls = [{'title':title_ls[i],'url':url_ls[i]} for i in range(len(title_ls))]
news_ls.reverse()
# print(news_ls)
i = 0
for new in news_ls:
i += 1
print(('\033[1;37m'+str(i)+'\033[0m').center(50,"*"))
print('\033[1;36m'+new.get('title')+'\033[0m')
news_length = len(news_ls)
# news_ls.reverse()
user_input = input("输入新闻编号获取进一步访问的超链接,输入q/Q退出,输入r/R刷新热榜:")
if user_input == 'q' or user_input == 'Q':
break
elif user_input == 'r' or user_input == 'R':
continue
elif user_input in [str(i) for i in range(1,news_length+1)]:
news_index = eval(user_input) - 1
print(news_ls[news_index].get('url'))
print("\033[1;33m" + "按住Ctrl键,点击超链接进行访问" + "\033[0m")
print('\033[5;31m'+'10s后自动刷新热榜'+'\033[0m')
time.sleep(10)
continue
else:
print("Invalid User Input.")
print('\033[5;31m'+"3s后自动刷新热榜"+'\033[0m')
time.sleep(3)
continue
print("Over,退出知乎热搜!")
if __name__ == '__main__':
get_zhihu_hot()
爬虫结果
![](https://i-blog.csdnimg.cn/direct/ab2175f1e2574ceabd87fe72ccb6eccd.png)