from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
import time
import csv
from selenium.webdriver.chrome.service import Service
# 设置Edge浏览器路径
service = Service(r"D:\python\msedgedriver.exe")#设置你自己下载的msedgedriver路径
# 初始化Edge浏览器
driver = webdriver.Edge(service=service)
# 打开新闻网站
driver.get("https://www.bohe.cn/zx/list/113_1/")
# 创建CSV文件保存数据
csv_file = open('health_news.csv', 'w', newline='', encoding='utf-8-sig')
csv_writer = csv.writer(csv_file)
csv_writer.writerow(['标题', '关键字', '时间'])
ctime = []
ckeyword = []
total_pages = 5
current_page = 1
while current_page <= total_pages:
# 使用XPath定位新闻标题、关键字和时间元素
titles = driver.find_elements(By.XPATH, "/html/body/div[4]/div[1]/div/div[2]/div[1]/div/div/h3/a[2]")
keywords = driver.find_elements(By.XPATH, "/html/body/div[4]/div[1]/div/div[2]/div[1]/div/div/div[2]/div/a")
times = driver.find_elements(By.XPATH, "/html/body/div[4]/div[1]/div/div[2]/div[1]/div/div/div[2]/time")
for i in range(len(titles)):
try:
title = titles[i].text.strip()
keyword = keywords[i].text.strip('#')
timess = times[i].text.strip()
except:
pass
# 输出标题、关键字和时间
print(f"标题: {title}")
print(f"关键字: {keyword}")
print(f"时间: {timess}")
# 写入CSV文件
csv_writer.writerow([title, keyword, timess])
# 自动翻页
next_button = driver.find_element(By.XPATH, "/html/body/div[4]/div[1]/div/div[2]/div[2]/p/a[4]")
next_button.click()
time.sleep(2) # 等待页面加载
current_page += 1
# 关闭CSV文件
csv_file.close()
# 关闭浏览器
driver.quit()
爬取结果