小白第一步
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from bs4 import BeautifulSoup
# 使用Selenium打开Chrome浏览器
driver = webdriver.Chrome()
# 打开微博热搜榜页面
driver.get('https://s.weibo.com/top/summary')
# 使用显式等待等待页面元素加载完成
try:
element = WebDriverWait(driver, 10).until(
EC.presence_of_element_located((By.CSS_SELECTOR, '.td-02 a'))
)
finally:
# 获取页面源代码
html = driver.page_source
# 关闭浏览器
driver.quit()
# 解析HTML内容
soup = BeautifulSoup(html, 'html.parser')
# 提取热搜榜信息
hot_topics = soup.select('.td-02 a')
# 打印热搜榜内容
for index, topic in enumerate(hot_topics, start=1):
print(f"{index}. {topic.text}")