From 202012190815 +0:00
requirements.txt
beautifulsoup4==4.9.3
certifi==2020.12.5
chardet==4.0.0
idna==2.10
PyMySQL==0.10.1
requests==2.25.1
soupsieve==2.1
urllib3==1.26.2
code
# 微博热搜榜 每分钟更新一次
# 微博要闻榜 每分钟更新一次
# https://s.weibo.com/top/summary/summary?cate=realtimehot
# https://s.weibo.com/top/summary/summary?cate=socialevent
import requests
from bs4 import BeautifulSoup
def get_real_time_hot():
url = "https://s.weibo.com/top/summary/summary?cate=realtimehot"
r = requests.get(url=url)
if r.status_code != 200:
return False, []
soup = BeautifulSoup(r.text, 'html.parser')
data = soup.find("div", "data").find("tbody").find_all("tr")
result = []
for tr in data:
rank_top = tr.find("td", "ranktop")
if rank_top is None:
continue
rank_top_idx = int(rank_top.get_text())
link = tr.find("a").get("href")
title = tr.find("a").get_text()
hot_num = int(tr.find("span").get_text())
tag = ""
if tr.find("i") is not None:
tag = tr.find("i").get_text()
result.append({
"rank": rank_top_idx,
"title": title,
"hot": hot_num,
"tag": tag,
"link": "https://s.weibo.com" + link,
})
return True, result
if __name__ == "__main__":
ok, res = get_real_time_hot()
for item in res:
print(item)
使用阿里云函数计算服务每分钟定时执行
# -*- coding: utf-8 -*-
import json
import logging
import os
import sys
from datetime import datetime
import pymysql
from collect import get_real_time_hot
logger = logging.getLogger()
conn = None
def connect_mysql():
global conn
try:
conn = pymysql.connect(
host=os.environ.get("MYSQL_HOST"),
port=int(os.environ.get("MYSQL_PORT")),
user=os.environ.get("MYSQL_USER"),
passwd=os.environ.get("MYSQL_PASSWD"),
db=os.environ.get("MYSQL_DB"),
connect_timeout=5
)
except Exception as e:
logger.error("ERROR: Unexpected error: Could not connect to MySql instance.")
logger.error(e)
sys.exit()
def initializer(context):
connect_mysql()
def handler(event, context):
global conn
try:
conn.ping()
except:
connect_mysql()
series_id = datetime.now().strftime("%Y%m%d%H%M")
ok, data = get_real_time_hot()
cursor = conn.cursor()
sql = "INSERT INTO realtimehot(`series_id`, `rank`, `title`, `hot`, `tag`, `link`) \
VALUES (%s, %s, %s, %s, %s, %s)"
val = []
for item in data:
val.append([series_id, item["rank"], item["title"], item["hot"], item["tag"], item["link"]])
try:
cursor.executemany(sql, val)
conn.commit()
except Exception as e:
logger.error(e)
conn.rollback()
return json.dumps(data, ensure_ascii=False)