Python热榜

「已注销」

已于 2022-06-17 11:39:18 修改

阅读量228

点赞数

文章标签： python pycharm 开发语言

于 2022-06-16 23:33:10 首次发布

本文链接：https://blog.csdn.net/spartan6/article/details/125325022

版权

需要用到的库有：requests、beautifulsoup4、pandas、openpyxl

安装库：pip install requests、pip install beautifulsoup4、pip install pandas、pip install openpyxl

#!/usr/bin/python
# -*- coding:utf-8 -*-

import requests
from bs4 import BeautifulSoup
import pandas
import webbrowser
from tkinter import *
import tkinter as tk
from tkinter import ttk
from tkinter.filedialog import askdirectory

window = tk.Tk()
window.title("全网热搜")
window.geometry("640x480+500+100")
window.resizable(width=False, height=False)  # 不可调整窗口大小
lab1 = tk.Label(window, text="目标路径:")
lab2 = tk.Label(window, text="选择分类:")
path = StringVar()
menu = ttk.Combobox(window, width=10)
menu['value'] = ('知乎热榜', '微博热搜榜', '微信热文榜', 'bilibili日榜', '抖音视频榜')
input = tk.Entry(window, textvariable=path, width=45)  # 创建一个输入框显示存放路径
thebox = tk.Listbox(window, width=89, height=19)


# ------选取本地路径------
def select_path():
    path_ = askdirectory()
    path.set(path_)


# -------定义一个函数清除输出框第一行到最后一行的内容-----
def cle():
    thebox.delete(0, "end")


# -------选择爬取热搜的类别------
def get_cid():
    if menu.get() == "知乎热榜":
        cid = '/n/mproPpoq6O'
    if menu.get() == "微博热搜榜":
        cid = '/n/KqndgxeLl9'
    if menu.get() == "微信热文榜":
        cid = '/n/WnBe01o371'
    if menu.get() == "bilibili日榜":
        cid = '/n/74KvxwokxM'
    if menu.get() == "抖音视频榜":
        cid = '/n/DpQvNABoNE'
    return cid


# -----------获取响应解析内容---------
def download():
    base_url = 'https://tophub.today'
    url = base_url + str(get_cid())  # 最终解析链接
    headers = {
        'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.132 Safari/537.36'}
    resp = requests.get(url, headers=headers)
    html = resp.text
    soup = BeautifulSoup(html, 'html.parser')
    nodes = soup.find('tbody').find_all('tr')  # 获取第一个tbody标签中的tr标签
    df = pandas.DataFrame()  # 初始化df
    lb = []  # 初始化lb
    for node in nodes:
        messages = node.find('td', class_='al').find_all('a')  # 获取所有a标签
        order = node.find('td', align='center').get_text()  # 获取排名
        heat = node.find_all('td')[2].get_text()  # 获取第二个td标签==热度
        for message in messages:
            title = message.get_text()  # 获取标题
            link = ['https://tophub.today' + message['href']]  # 获取链接
            lb = lb + link  # 将link循环写入到lb
            file_name = input.get() + '/' + menu.get() + '.xlsx'
            data = {
                '排名': [order],
                '热度': [heat],
                '标题': [title],
                '链接': link
            }
            item = pandas.DataFrame(data)
            df = pandas.concat([df, item])  # 合并表格
            df.to_excel(file_name, index=False)  # 写入表格到指定文件夹
        thebox.insert('end', (order, heat, title))  # 写入到展示框
    return lb


# -----------打开选中链接----------
def open_url():
    choice = thebox.curselection()[0]  # 获取选中的行号
    link = download()  # 调用download函数中的lb列表
    webbrowser.open(link[choice], new=1, autoraise=True)  # 用浏览器打开选中行对应的链接


# ----------用于选择保存路径-------
button0 = tk.Button(window, text='选择路径', relief=tk.RAISED, width=8, height=1, command=select_path)
# ----------用于paqu功能----------
button1 = tk.Button(window, text='爬取', relief=tk.RAISED, width=8, height=1, command=download)
# ----------用于清空输出框--------
button2 = tk.Button(window, text='清空输出', relief=tk.RAISED, width=8, height=1, command=cle)
# -----------用于打开链接----------
button3 = tk.Button(window, text='打开', width=8, height=1, command=open_url)

# -----------完成元素布局和设置部件位置---------
lab1.place(x=10, y=10)
lab2.place(x=10, y=60)
menu.place(x=90, y=60)
input.place(x=90, y=10)
thebox.place(x=5, y=110)
button0.place(x=540, y=5)
button1.place(x=420, y=55)
button2.place(x=540, y=55)
button3.place(x=300, y=55)
tk.mainloop()

工程文件（提取码：c4kj）