爬取微博热搜
import schedule
import pandas as pd
from datetime import datetime
import requests
from bs4 import BeautifulSoup
url = "https://s.weibo.com/top/summary?cate=realtimehot&sudaref=s.weibo.com&display=0&retcode=6102"
get_info_dict = {
}
count = 0
def main():
global url, get_info_dict, count
get_info_list = []
print("正在爬取数据~~~")
html = requests.get(url).text
soup = BeautifulSoup(html, 'lxml')
for tr in soup.find_all(name='tr', class_=''):
get_info = get_info_dict.copy()
get_info['title'] = tr.find(class_='td-02').find(name='a').text
try:
get_info['num'] = eval